Code
sshfs narecco@ant-login.linux.crg.es:/users/mirimia ~/mntLast code execution: 2024 February 07, Wednesday @ 21:02:47.
ChIP-seq analysis.
To fetch some of the inclusion tables (PSI data) stored on the CRG cluster I mount the server on my local computer with the following command in the terminal.
sshfs narecco@ant-login.linux.crg.es:/users/mirimia ~/mntwhich prompts my password and then allows me to navigate the cluster directories.
General info.
Load packages required for the analysis and suppress any message. Check the Section 5 section at the end for package versions.
library(dplyr, warn.conflicts = F, quietly = T)
library(readr)
library(readxl)
library(stringr)
library(tidyr)
library(tibble)
library(ggplot2)
library(ggforce)
library(ggsignif)
library(ggh4x)
library(ggrastr) # for rasterizing only the geom_point layer in a plot with many data-points
library(rstatix)
library(janitor)
library(DT)
library(BioVenn) # Venn Diagram
library(caroline) # Spie chart
library(patchwork)Define a plot style.
theme_classic(base_family = 'Arial', base_size = 6) +
theme(axis.text = element_text(colour = 'black'),
axis.ticks.length = unit(1, 'mm'),
axis.ticks = element_line(linewidth = 0.1),
axis.line = element_line(linewidth = 0.1),
axis.title = element_text(size = 5),
panel.grid.major = element_line(linewidth = 0.2, colour = 'gray84'),
strip.text = element_text(size = 10),
strip.background = element_blank(),
legend.title = element_blank(),
legend.key.size = unit(1, 'mm'),
legend.position = c(0.21, 0.92),
legend.box.background = element_blank(),
legend.margin = margin(t = -1, b = 1, unit = 'mm'),
plot.background = element_blank(),
panel.background = element_blank()) -> scatter_th
theme_classic(base_family = 'Arial', base_size = 6) +
theme(axis.text = element_text(colour = 'black'),
axis.text.x = element_text(angle = 45, hjust = 1, margin = margin(t = -0.4, unit = 'mm') ),
axis.title.x = element_blank(),
axis.ticks.length.y = unit(1, 'mm'),
axis.ticks = element_line(linewidth = 0.1),
axis.ticks.x = element_blank(),
axis.line = element_line(linewidth = 0.1),
axis.title = element_text(size = 5),
panel.grid.major.y = element_line(linewidth = 0.2, colour = 'gray84'),
panel.background = element_blank(),
panel.spacing = unit(3, 'mm'),
strip.text = element_text(size = 5),
strip.background = element_rect(linewidth = 0.1),
plot.background = element_blank() ) -> boxplot_th
theme_classic(base_family = 'Arial', base_size = 6) +
theme(axis.text = element_text(colour = 'black'),
axis.text.x = element_text(margin = margin(t = -0.3, unit = 'mm') ),
axis.ticks.length.y = unit(1, 'mm'),
axis.ticks = element_line(linewidth = 0.1),
axis.ticks.x = element_blank(),
axis.line = element_line(linewidth = 0.1),
axis.title = element_text(size = 5),
panel.grid.major = element_line(linewidth = 0.2, colour = 'gray84'),
panel.background = element_blank(),
panel.spacing = unit(3, 'mm'),
legend.position = c(0.93, 0.87),
legend.key.size = unit(1, 'mm'),
legend.title = element_blank(),
legend.box.background = element_blank(),
legend.margin = margin(t = -1, b = 1, unit = 'mm'),
strip.text = element_text(size = 5),
strip.background = element_rect(linewidth = 0.1),
plot.background = element_blank() ) -> profile_thFunction import GENEprofile files from SeqCode
# read "GENEprofile" file types from SeqCode
read_prfl <- function(path_dir, txt_name, epitope, geneset_type, sample_name) {
df_in <- read_delim(file = file.path(path_dir, txt_name),
delim = '\t', show_col_types = FALSE, col_names = F ) |>
setNames( c("MetaPosition", paste0(epitope, "_signal") ) ) |>
mutate(type = geneset_type, Sample = sample_name )
return(df_in)
}Here I organise all the file and directories- paths I need to run the analysis and define where to save the processed tables and figures.
oneDrive_Dir <- file.path("~/OneDrive - CRG - Centre de Regulacio Genomica/Suz12_AS_project")
code_dir_fig <- file.path(oneDrive_Dir, "_Code/Fig5")
tbl_dir_fig5 <- file.path(code_dir_fig, "tables")
pdf_dir_fig5 <- file.path(code_dir_fig, "pdfs")
if (!dir.exists(pdf_dir_fig5)) { dir.create(pdf_dir_fig5, recursive = T) }
if (!dir.exists(tbl_dir_fig5)) { dir.create(tbl_dir_fig5, recursive = T) }Path to DropBox folders where Enrique releases the documents.
dropbox_dir <- file.path('~/Dropbox (CRG ADV)/54_Suz12AS')
round2_h3k27me3_peaks_dir <- file.path(dropbox_dir, 'ROUND2_H3K27me3_ChIP/GENESETS/MACS1')
stopifnot(dir.exists(round2_h3k27me3_peaks_dir))
round3_suz12_peaks_dir <- file.path(dropbox_dir, 'ROUND3 - SUZ12 ChIP/files/genesets')
stopifnot(dir.exists(round3_suz12_peaks_dir))
round17_meta_dir <- file.path(dropbox_dir, 'ROUND17_Figure3/01_files_for_boxplotofmetaplots')
round17_scatter_dir <- file.path(dropbox_dir, 'ROUND17_Figure3/02_files_for_scattersboxplots')
stopifnot(dir.exists(round17_meta_dir))
stopifnot(dir.exists(round17_scatter_dir))
round21_meta_dir <- file.path(dropbox_dir, 'ROUND21_PcGSubUnitChIPseq/METAPLOTS/GENEPLOTS')
stopifnot(dir.exists(round21_meta_dir))
round23_scatter_dir <- file.path(dropbox_dir, 'ROUND23_FinalFiguresRevision/SCATTERPLOTS')
stopifnot(dir.exists(round23_scatter_dir))
round23_metagene_dir <- file.path(dropbox_dir, 'ROUND23_FinalFiguresRevision/METAPLOTS')
stopifnot(dir.exists(round23_metagene_dir))List all bed files to be used for the metagene profiles
bed_paths <- list.files(round17_meta_dir, recursive = T, pattern = "*.bed$")Path where the data to plot meta genes profiles for H3K27me3 are stored.
CGI_Dir <- file.path(oneDrive_Dir, '11_ChIP/Processed_data/Metaplots_CGI')
stopifnot(dir.exists(CGI_Dir))Import all 2.5 kb bins of H3K27me3.
all_bins25kb_K27me3 <- read_delim(file = file.path(round17_scatter_dir, 'H3K27me3_R3all_bins_2.5Kb_yes.txt'),
delim = '\t', progress = F,
col_names = c('bin', "WT", 'CSex4', 'dex4', 'KO'),
show_col_types = FALSE) |>
mutate(chr = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,1],
start = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,2],
end = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,3]) |>
relocate(chr, start, end, .after = bin) Import the annotated bins that belong to SUZ12 targeted CGI and those that contain a CGI that is not target
bins25kbK27me3_CGI_SUZ12 <- read_delim(
file = file.path(round17_scatter_dir, 'H3K27me3_R3all_bins_2.5Kb_yes_CGI-SUZ12.txt'),
delim = c(' '), col_names = c('bin', "garbage"), show_col_types = FALSE
)
bins25kbK27me3_other_CGI <- read_delim(
file = file.path(round17_scatter_dir, 'H3K27me3_R3all_bins_2.5Kb_yes_CGI-NOPCG.txt'),
delim = c(' '), col_names = c('bin', "garbage"), show_col_types = FALSE
)Check how many of the total bins are in each set (SUZ12 targeted CGIs or others CGIs).
table(all_bins25kb_K27me3$bin %in% bins25kbK27me3_CGI_SUZ12$bin)
FALSE TRUE
990222 4189
table(all_bins25kb_K27me3$bin %in% bins25kbK27me3_other_CGI$bin)
FALSE TRUE
981600 12811
Map the total bins in each set
all_bins25kb_K27me3 |>
mutate(type = case_when(bin %in% bins25kbK27me3_CGI_SUZ12$bin ~ "PcG targeted CGI",
bin %in% bins25kbK27me3_other_CGI$bin ~ "Other CGI") ) |>
mutate(type = ifelse(is.na(type), yes = "Rest of the genome", no = type )
) -> all_bins25kb_K27me3Set bin type as factor.
all_bins25kb_K27me3$type <- factor(all_bins25kb_K27me3$type,
levels = c("PcG targeted CGI", "Other CGI", "Rest of the genome") )Overview of bins file.
datatable(head(all_bins25kb_K27me3, 150), rownames = FALSE, filter = 'top',
options = list(pageLength = 5, autoWidth = TRUE) )Define the flanking bins (-1 and +1 bins) surrounding CGI.
indx_targeted <- which(all_bins25kb_K27me3$type == "PcG targeted CGI")
message(length(indx_targeted), ' PcG targeted bins')4189 PcG targeted bins
Get first the +1 bins:
If the difference in index number between the targeted bin and the + 1 is 1, meaning the 2 PcG targeted CGI are one next to each other, exclude the +1 bins. Otherwise, label every +1 index number as downstream flanking PcG targeted CGI.
plus_one_targeted <- c()
for (i in 1:(length(indx_targeted)-1) ) {
if ( (indx_targeted[i + 1] - indx_targeted[i]) == 1 ) {
# Do no nothing
plus_one_targeted[i] <- 0
} else {
plus_one_targeted[i] <- indx_targeted[i] + 1
}
}Here’s an example
message('Targeted indexes:\t', paste0(head(indx_targeted, 7), sep = ' ') )Targeted indexes: 983 994 2042 2295 2647 2648 2884
message('Flanking +1 indexes:\t', paste0(head(plus_one_targeted, 7), sep = ' ') )Flanking +1 indexes: 984 995 2043 2296 0 2649 2885
Since bins numbered 2647 and 2648 are next to each other, the bin numbered 2649 is selected and there’s a 0 for the 2648. By removing the zeros I remove the consecutive CGI bins.
plus_one_targeted <- plus_one_targeted[plus_one_targeted != 0]Get the -1 bins: here the logic is the same as above, but inverted. If the difference in index number between the targeted bin and the -1 bin is 1, meaning the 2 PcG targeted CGI are one next to each other, exclude the -1 bins.
minus_one_targeted <- c()
for (i in 2:(length(indx_targeted)) ) {
if ( ( indx_targeted[i] - indx_targeted[i - 1] ) == 1 ) {
# Do no nothing
minus_one_targeted[i] <- 0
} else {
minus_one_targeted[i] <- indx_targeted[i] - 1
}
}Remove first NA, and consecutive bins defined by zeros.
minus_one_targeted[which( is.na(minus_one_targeted) )] <- indx_targeted[1] - 1
minus_one_targeted <- minus_one_targeted[minus_one_targeted != 0]Check binning
message('Targeted indexes:\t', paste0(head(indx_targeted, 7), sep = ' ') )Targeted indexes: 983 994 2042 2295 2647 2648 2884
message('Flanking -1 indexes:\t', paste0(head(minus_one_targeted, 7), sep = ' ') )Flanking -1 indexes: 982 993 2041 2294 2646 2883 4248
Since bins numbered 2647 and 2648 are next to each other, the bin numbered 2646 is selected and there’s a 0 for the 2647. By removing the zeros I remove the consecutive CGI bins.
Check that there’s no overlap between the targeted and the flanking regions.
table(plus_one_targeted %in% indx_targeted)
FALSE
2890
table(minus_one_targeted %in% indx_targeted)
FALSE
2891
Check that some flanking bins can be both upstream or downstream.
table(minus_one_targeted %in% plus_one_targeted)
FALSE TRUE
2694 197
Create a new bin type variable called type2.
all_bins25kb_K27me3$type2 <- NA
all_bins25kb_K27me3[ indx_targeted, ]$type2 <- "PcG targeted CGI"
all_bins25kb_K27me3[ plus_one_targeted, ]$type2 <- "PcG targeted CGI + 1"
all_bins25kb_K27me3[ minus_one_targeted, ]$type2 <- "PcG targeted CGI - 1"
all_bins25kb_K27me3[ is.na(all_bins25kb_K27me3$type2), ]$type2 <- "Rest of the genome"
all_bins25kb_K27me3$type2 <- factor(all_bins25kb_K27me3$type2,
levels = c(
"PcG targeted CGI",
"PcG targeted CGI + 1",
"PcG targeted CGI - 1",
"Rest of the genome") )
table(all_bins25kb_K27me3$type2)
PcG targeted CGI PcG targeted CGI + 1 PcG targeted CGI - 1
4189 2693 2891
Rest of the genome
984638
Log10 transform the bins signal.
all_bins25kb_K27me3 |>
column_to_rownames(var = "bin") |>
select(chr, start, end, WT, CSex4, dex4, KO, type, type2) |>
mutate( across( where(is.double), log10 ) ) |>
# arrange is used to favour the CGI point to appear later in the plot and therefore be above the other points.
arrange( desc(type2) ) -> log_bins25kb_K27me3Check total number of bin.
num_all_bins_2.5kb <- nrow(log_bins25kb_K27me3)
message("Number of bins of 2.5kb width to plot: ", num_all_bins_2.5kb)Number of bins of 2.5kb width to plot: 994411
Individual bin types numbers:
targeted_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="PcG targeted CGI"]
plus_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="PcG targeted CGI + 1"]
minus_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="PcG targeted CGI - 1"]
flanking_nBins <- sum(plus_nBins, minus_nBins)
ROTG_nBins <- table(log_bins25kb_K27me3$type2)[names(table(log_bins25kb_K27me3$type2))=="Rest of the genome"]
message("Bins containing a targeted CGI: ", targeted_nBins, "\n",
"Bins flanking a targeted CGI: ", flanking_nBins, "\n",
"Bins in the rest of genome: ", ROTG_nBins)Bins containing a targeted CGI: 4189
Bins flanking a targeted CGI: 5584
Bins in the rest of genome: 984638
Fit a linear model between different conditions.
intercet_wt_dex4 <- coef( lm(log_bins25kb_K27me3$dex4 ~ log_bins25kb_K27me3$WT ) )[1]
slope_wt_dex4 <- coef( lm(log_bins25kb_K27me3$dex4 ~ log_bins25kb_K27me3$WT ) )[2] If I were to plot all bins signal as a scatter plot of WT vs ∆ex4 I would do something like this:
log_bins25kb_K27me3 |>
ggplot() +
aes(x = WT, y = dex4, colour = type2) +
# facet_wrap(~type2, scales = 'fixed') +
geom_point(size = 0.75) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_dex4, intercept = intercet_wt_dex4, linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5) ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"PcG targeted CGI" = '#A2234C',
"Rest of the genome" = '#72817F') ) +
labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ),
y = expression("Spike-in normalized ∆ex4 H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
scatter_th -> pBins_WT_vs_dex4_All_Targeted
# do not plot cause it's 2 many points
# pBins_WT_vs_dex4_All_Targeted
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5B_CGI_coloured_WT_vs_Dex4_bins_n", num_all_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_dex4_All_Targeted, device = cairo_pdf, units = "cm",
width = 4.5, height = 4.5)However that breaks the RStudio graphic engine.
To overcome some of the overplotting I remove points that are identical by type2 value. If 2 point have the exact same values, the duplicated point is removed from the dataframe. This helps removing points that are going to be plotted on top of each other, making the plot more manageable. This operation requires the use of the dataframe row names.
Start by removing the many “rest of the genome” bins
indx_dups_ROTG_wt_dex4 <-
which(duplicated(log_bins25kb_K27me3[log_bins25kb_K27me3$type2 == "Rest of the genome" , c("WT", "dex4")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3[indx_dups_ROTG_wt_dex4, ])
wt_dex4_bins <- log_bins25kb_K27me3[!rownames(log_bins25kb_K27me3) %in% row_names_dups_ROTG,]Then removing the “PcG targeted CGI”
indx_dups_PTC_wt_dex4 <- which(duplicated(wt_dex4_bins[wt_dex4_bins$type2 == "PcG targeted CGI", c("WT", "dex4")] ) )
row_names_dups_PTC <- rownames(wt_dex4_bins[indx_dups_PTC_wt_dex4, ])
wt_dex4_bins <- wt_dex4_bins[!rownames(wt_dex4_bins) %in% row_names_dups_PTC,]Lastly the +1 and -1 bins
indx_dups_P1M1_wt_dex4 <- which(duplicated(wt_dex4_bins[wt_dex4_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT", "dex4")] ) )
row_names_dups_P1M1 <- rownames(wt_dex4_bins[indx_dups_P1M1_wt_dex4, ])
wt_dex4_bins <- wt_dex4_bins[!rownames(wt_dex4_bins) %in% row_names_dups_P1M1,]Re-calculate bin numbers now after filtering
num_fltrd_bins_2.5kb <- nrow(wt_dex4_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)Number of bins of 2.5kb width to plot: 16662
Plot figure 5C.
ggplot(wt_dex4_bins) +
aes(x = WT, y = dex4, colour = type2) +
# facet_wrap(~type2, scales = 'fixed') +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_dex4, intercept = intercet_wt_dex4, linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ),
y = expression("Spike-in normalized ∆ex4 H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th -> pBins_WT_vs_dex4_Filtered_TargetedSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5C_CGI_coloured_WT_vs_Dex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_dex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)The scatter plot contains too many points to be efficiently plotted. For an effective scatter plot visualisation I rasterise only the specific points layer of the ggplot2 plot and keep all other labels and text in vector format.
rBins_WT_vs_dex4_Filtered_Targeted <- rasterize(pBins_WT_vs_dex4_Filtered_Targeted, layers = 'Point', dpi = 400)Plot rasterised scatter plot for figure 5C
rBins_WT_vs_dex4_Filtered_TargetedSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5C_RASTERIZED_CGI_coloured_WT_vs_Dex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_dex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Now do the same but for CSex4 vs WT.
Get linear fit.
intercet_wt_CSex4 <- coef( lm(log_bins25kb_K27me3$CSex4 ~ log_bins25kb_K27me3$WT ) )[1]
slope_wt_CSex4 <- coef( lm(log_bins25kb_K27me3$CSex4 ~ log_bins25kb_K27me3$WT ) )[2] Filter the bins like before. Start by removing the many “rest of the genome” bins
indx_dups_ROTG_wt_CSex4 <- which(duplicated(log_bins25kb_K27me3[log_bins25kb_K27me3$type2 == "Rest of the genome", c("WT", "CSex4")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3[indx_dups_ROTG_wt_CSex4, ])
wt_CSex4_bins <- log_bins25kb_K27me3[!rownames(log_bins25kb_K27me3) %in% row_names_dups_ROTG,]Filter the “PcG targeted CGI” in CSex4 vs WT dataset
indx_dups_PTC_wt_CSex4 <- which(duplicated(wt_CSex4_bins[wt_CSex4_bins$type2 == "PcG targeted CGI", c("WT", "CSex4")] ) )
row_names_dups_PTC <- rownames(wt_CSex4_bins[indx_dups_PTC_wt_CSex4, ])
wt_CSex4_bins <- wt_CSex4_bins[!rownames(wt_CSex4_bins) %in% row_names_dups_PTC,]And the +1 and -1 flanking bins
indx_dups_P1M1_wt_CSex4 <- which(duplicated(wt_CSex4_bins[wt_CSex4_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT", "CSex4")] ) )
row_names_dups_P1M1 <- rownames(wt_CSex4_bins[indx_dups_P1M1_wt_CSex4, ])
wt_CSex4_bins <- wt_CSex4_bins[!rownames(wt_CSex4_bins) %in% row_names_dups_P1M1,]Check number of bins after filtering
num_fltrd_bins_2.5kb <- nrow(wt_CSex4_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)Number of bins of 2.5kb width to plot: 16189
Basically this filtering removes duplicated points.
ggplot(wt_CSex4_bins) +
aes(x = WT, y = CSex4, colour = type2) +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_CSex4, intercept = intercet_wt_CSex4,
linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ),
y = expression("Spike-in normalized CSex4 H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th -> pBins_WT_vs_CSex4_Filtered_TargetedSave to pdf vector.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5B_CGI_coloured_WT_vs_CSex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_CSex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Rasterise only the point layer of ggplot
rBins_WT_vs_CSex4_Filtered_Targeted <- rasterize(pBins_WT_vs_CSex4_Filtered_Targeted, layers = 'Point', dpi = 400)Show plot as presented in figure 5B.
rBins_WT_vs_CSex4_Filtered_TargetedSave to pdf rasterised scatter plot.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5B_RASTERIZED_CGI_coloured_WT_vs_CSex4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_CSex4_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Here I repeat the same analysis but for the SUZ12-KO isoform specific rescue cell lines.
all_bins25kb_K27me3_rescues <- read_delim(
file = file.path(round23_scatter_dir, 'H3K27me3_R3all--W2-S1-S3-L4-L6-L3D2-S3D2_bins_2.5Kb_yes.txt'),
delim = ' ', progress = F,
col_names = c('bin', 'WT', 'CSex4', 'dex4', 'KO',
'WT2', 'KOrS1', 'KOrS3', 'KOrL4', 'KOrL6', 'KOrL3D2', 'KOrS3D2'),
show_col_types = FALSE) |>
mutate(chr = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,1],
start = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,2],
end = str_split_fixed(pattern = "\\*", string = bin, n = 3)[,3]) |>
relocate(chr, start, end, .after = bin) The samples 'WT2', 'KOrS1', 'KOrS3', 'KOrL4', 'KOrL6', 'KOrL3D2', 'KOrS3D2' were done after in a second round compared to the first round of samples ('WT', 'CSex4', 'dex4', 'KO').
Check how many bins fall within a SUZ12 CGIs or other CGI
table(all_bins25kb_K27me3_rescues$bin %in% bins25kbK27me3_CGI_SUZ12$bin)
FALSE TRUE
987212 4189
table(all_bins25kb_K27me3_rescues$bin %in% bins25kbK27me3_other_CGI$bin)
FALSE TRUE
978601 12800
Annotate rescues bins that fall in targeted CGIs.
all_bins25kb_K27me3_rescues |>
mutate(type = case_when(bin %in% bins25kbK27me3_CGI_SUZ12$bin ~ "PcG targeted CGI",
bin %in% bins25kbK27me3_other_CGI$bin ~ "Other CGI") ) |>
mutate(type = ifelse(is.na(type), yes = "Rest of the genome", no = type )
) -> all_bins25kb_K27me3_rescuesall_bins25kb_K27me3_rescues$type <- factor(all_bins25kb_K27me3_rescues$type,
levels = c("PcG targeted CGI", "Other CGI", "Rest of the genome") )Same as before identify the CGI flanking bins.
First the downstream +1 bins.
indx_targeted <- which(all_bins25kb_K27me3_rescues$type == "PcG targeted CGI")
plus_one_targeted <- c()
for (i in 1:(length(indx_targeted)-1) ) {
if ( (indx_targeted[i + 1] - indx_targeted[i]) == 1 ) {
# Do no nothing
plus_one_targeted[i] <- 0
} else {
plus_one_targeted[i] <- indx_targeted[i] + 1
}
}
plus_one_targeted <- plus_one_targeted[plus_one_targeted != 0]Then the upstream -1 bins
minus_one_targeted <- c()
for (i in 2:(length(indx_targeted)) ) {
if ( ( indx_targeted[i] - indx_targeted[i - 1] ) == 1 ) {
# Do no nothing
minus_one_targeted[i] <- 0
} else {
minus_one_targeted[i] <- indx_targeted[i] - 1
}
}
minus_one_targeted[which( is.na(minus_one_targeted) )] <- indx_targeted[1] - 1
minus_one_targeted <- minus_one_targeted[minus_one_targeted != 0]Create a new bin type variable called type2 also for rescue data.
all_bins25kb_K27me3_rescues$type2 <- NA
all_bins25kb_K27me3_rescues[ indx_targeted, ]$type2 <- "PcG targeted CGI"
all_bins25kb_K27me3_rescues[ plus_one_targeted, ]$type2 <- "PcG targeted CGI + 1"
all_bins25kb_K27me3_rescues[ minus_one_targeted, ]$type2 <- "PcG targeted CGI - 1"
all_bins25kb_K27me3_rescues[ is.na(all_bins25kb_K27me3_rescues$type2), ]$type2 <- "Rest of the genome"
all_bins25kb_K27me3_rescues$type2 <- factor(all_bins25kb_K27me3_rescues$type2,
levels = c(
"PcG targeted CGI",
"PcG targeted CGI + 1",
"PcG targeted CGI - 1",
"Rest of the genome") )
table(all_bins25kb_K27me3_rescues$type2)
PcG targeted CGI PcG targeted CGI + 1 PcG targeted CGI - 1
4189 2693 2890
Rest of the genome
981629
Log10 transform the bins signal in rescue data.
all_bins25kb_K27me3_rescues |>
column_to_rownames(var = "bin") |>
select(chr, start, end, WT2, KOrS1, KOrS3, KOrL4, KOrL6, KOrL3D2, KOrS3D2, type, type2) |>
mutate( across( where(is.double), log10 ) ) |>
# arrange is used to favour the CGI point to appear later in the plot and therefore be above the other points.
arrange( desc(type2) ) -> log_bins25kb_K27me3_rescueCheck total number of bin in rescues.
num_all_bins_2.5kb <- nrow(log_bins25kb_K27me3_rescue)
message("Number of bins of 2.5kb width to plot: ", num_all_bins_2.5kb)Number of bins of 2.5kb width to plot: 991401
Individual bin types numbers in rescues:
targeted_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="PcG targeted CGI"]
plus_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="PcG targeted CGI + 1"]
minus_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="PcG targeted CGI - 1"]
flanking_nBins <- sum(plus_nBins, minus_nBins)
ROTG_nBins <- table(log_bins25kb_K27me3_rescue$type2)[names(table(log_bins25kb_K27me3_rescue$type2))=="Rest of the genome"]
message("Bins containing a targeted CGI: ", targeted_nBins, "\n",
"Bins flanking a targeted CGI: ", flanking_nBins, "\n",
"Bins in the rest of genome: ", ROTG_nBins)Bins containing a targeted CGI: 4189
Bins flanking a targeted CGI: 5583
Bins in the rest of genome: 981629
Fit a linear model between different conditions.
intercet_wt_KOrS1 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS1 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1]
slope_wt_KOrS1 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS1 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2]
intercet_wt_KOrS3 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1]
slope_wt_KOrS3 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] To overcome some of the overplotting I remove points that are identical by type2 value. If 2 point have the exact same values, the duplicated point is removed from the dataframe.
Start by removing the many “rest of the genome” bins
indx_dups_ROTG_wt_KOrS1 <-
which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome" , c("WT2", "KOrS1")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrS1, ])
wt_KOrS1_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]Then removing the “PcG targeted CGI”
indx_dups_PTC_wt_KOrS1 <- which(duplicated(wt_KOrS1_bins[wt_KOrS1_bins$type2 == "PcG targeted CGI", c("WT2", "KOrS1")] ) )
row_names_dups_PTC <- rownames(wt_KOrS1_bins[indx_dups_PTC_wt_KOrS1, ])
wt_KOrS1_bins <- wt_KOrS1_bins[!rownames(wt_KOrS1_bins) %in% row_names_dups_PTC,]Lastly the +1 and -1 bins
indx_dups_P1M1_wt_KOrS1 <- which(duplicated(wt_KOrS1_bins[wt_KOrS1_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrS1")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrS1_bins[indx_dups_P1M1_wt_KOrS1, ])
wt_KOrS1_bins <- wt_KOrS1_bins[!rownames(wt_KOrS1_bins) %in% row_names_dups_P1M1,]Re-calculate bin numbers now after filtering
num_fltrd_bins_2.5kb <- nrow(wt_KOrS1_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)Number of bins of 2.5kb width to plot: 16948
Plot figure 5E
ggplot(wt_KOrS1_bins) +
aes(x = WT2, y = KOrS1, colour = type2) +
# facet_wrap(~type2, scales = 'fixed') +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_KOrS1, intercept = intercet_wt_KOrS1, linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ),
y = expression("Spike-in normalized KO+S H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th -> pBins_WT_vs_KOrS1_Filtered_Targeted
pBins_WT_vs_KOrS1_Filtered_TargetedSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5E_CGI_coloured_WT_vs_KOrS1_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_KOrS1_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Rasterise the points to 400dpi
rBins_WT_vs_KOrS1_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrS1_Filtered_Targeted, layers = 'Point', dpi = 400)Plot rasterised scatter plot for figure 3B
rBins_WT_vs_KOrS1_Filtered_TargetedSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5E_RASTERIZED_CGI_coloured_WT_vs_KOrS1_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_KOrS1_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Now for the KO+L (KOrL4).
Get linear fit.
intercet_wt_KOrL4 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL4 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1]
slope_wt_KOrL4 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL4 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2]
intercet_wt_KOrL6 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL6 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1]
slope_wt_KOrL6 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL6 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] Check linear fit parameters
message("WT vs KO+L4: intercept = ", round(intercet_wt_KOrL4, 3), " slope = ", round(slope_wt_KOrL4, 3))WT vs KO+L4: intercept = 0 slope = 0.824
message("WT vs KO+L6: intercept = ", round(intercet_wt_KOrL6, 3), " slope = ", round(slope_wt_KOrL6, 3))WT vs KO+L6: intercept = 0.222 slope = 0.772
Filter the bins like before. Start by removing the many “rest of the genome” bins
indx_dups_ROTG_wt_KOrL4 <- which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome", c("WT2", "KOrL4")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrL4, ])
wt_KOrL4_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]Filter the “PcG targeted CGI” in KO rescue Long clone 4 vs WT dataset
indx_dups_PTC_wt_KOrL4 <- which(duplicated(wt_KOrL4_bins[wt_KOrL4_bins$type2 == "PcG targeted CGI", c("WT2", "KOrL4")] ) )
row_names_dups_PTC <- rownames(wt_KOrL4_bins[indx_dups_PTC_wt_KOrL4, ])
wt_KOrL4_bins <- wt_KOrL4_bins[!rownames(wt_KOrL4_bins) %in% row_names_dups_PTC,]And the +1 and -1 flanking bins
indx_dups_P1M1_wt_KOrL4 <- which(duplicated(wt_KOrL4_bins[wt_KOrL4_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrL4")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrL4_bins[indx_dups_P1M1_wt_KOrL4, ])
wt_KOrL4_bins <- wt_KOrL4_bins[!rownames(wt_KOrL4_bins) %in% row_names_dups_P1M1,]Check number of bins after filtering:
num_fltrd_bins_2.5kb <- nrow(wt_KOrL4_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)Number of bins of 2.5kb width to plot: 14071
Plot figure 5D
ggplot(wt_KOrL4_bins) +
aes(x = WT2, y = KOrL4, colour = type2) +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_KOrL4, intercept = intercet_wt_KOrL4,
linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT H3K27me3 signal (" ~ log[10] ~ ")" ),
y = expression("Spike-in normalized KO+L H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th -> pBins_WT_vs_KOrL4_Filtered_Targeted
pBins_WT_vs_KOrL4_Filtered_TargetedSave to pdf vector.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5D_CGI_coloured_WT_vs_KOrL4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_KOrL4_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Rasterise only the point layer of ggplot
rBins_WT_vs_KOrL4_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrL4_Filtered_Targeted, layers = 'Point', dpi = 400)Show plot as presented in figure.
rBins_WT_vs_KOrL4_Filtered_TargetedSave to pdf rasterised scatter plot.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5D_RASTERIZED_CGI_coloured_WT_vs_KOrL4_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_KOrL4_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)SUZ12-S 3D mutant H3K27me3 scatter plot.
Get linear fit.
intercet_wt_KOrS3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1]
slope_wt_KOrS3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrS3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] Check linear fit parameters
message("WT2 vs KOrS3D2: intercept = ", round(intercet_wt_KOrS3D2, 3),
" slope = ", round(slope_wt_KOrS3D2, 3))WT2 vs KOrS3D2: intercept = 0.173 slope = 0.809
Filter the bins like before. Start by removing the many “rest of the genome” bins
indx_dups_ROTG_wt_KOrS3D2 <- which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome", c("WT2", "KOrS3D2")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrS3D2, ])
wt_KOrS3D2_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]Filter the “PcG targeted CGI” in KO rescue SUZ12-Short 3D mutant vs WT dataset
indx_dups_PTC_wt_KOrS3D2 <- which(duplicated(wt_KOrS3D2_bins[wt_KOrS3D2_bins$type2 == "PcG targeted CGI", c("WT2", "KOrS3D2")] ) )
row_names_dups_PTC <- rownames(wt_KOrS3D2_bins[indx_dups_PTC_wt_KOrS3D2, ])
wt_KOrS3D2_bins <- wt_KOrS3D2_bins[!rownames(wt_KOrS3D2_bins) %in% row_names_dups_PTC,]And the +1 and -1 flanking bins
indx_dups_P1M1_wt_KOrS3D2 <- which(duplicated(wt_KOrS3D2_bins[wt_KOrS3D2_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrS3D2")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrS3D2_bins[indx_dups_P1M1_wt_KOrS3D2, ])
wt_KOrS3D2_bins <- wt_KOrS3D2_bins[!rownames(wt_KOrS3D2_bins) %in% row_names_dups_P1M1,]Check number of bins after filtering:
num_fltrd_bins_2.5kb <- nrow(wt_KOrS3D2_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)Number of bins of 2.5kb width to plot: 12240
Plot figure S5G
ggplot(wt_KOrS3D2_bins) +
aes(x = WT2, y = KOrS3D2, colour = type2) +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_KOrS3D2, intercept = intercet_wt_KOrS3D2,
linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT2 H3K27me3 signal (" ~ log[10] ~ ")" ),
y = expression("Spike-in normalized KO+S-3D H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th -> pBins_WT_vs_KOrS3D2_Filtered_Targeted
pBins_WT_vs_KOrS3D2_Filtered_TargetedSave to pdf vector.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5G_CGI_coloured_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_KOrS3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Rasterise only the point layer of ggplot
rBins_WT_vs_KOrS3D2_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrS3D2_Filtered_Targeted,
layers = 'Point', dpi = 400)Show plot as presented in figure.
rBins_WT_vs_KOrS3D2_Filtered_TargetedSave to pdf rasterised scatter plot.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5G_RASTERIZED_CGI_coloured_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_KOrS3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Show the same plot but without the log10 scaling of the data.
ggplot(wt_KOrS3D2_bins) +
aes(x = 10^WT2, y = 10^KOrS3D2, colour = type2) +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_KOrS3D2, intercept = intercet_wt_KOrS3D2,
linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 150), ylim = c(0, 150), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT#2 H3K27me3 signal" ),
y = expression("Spike-in normalized KO+S-3D H3K27me3 signal" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th +
theme(legend.position = c(0.3, 0.92) ) -> pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10
pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10Save to pdf vector.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5G_CGI_NoLog10_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Rasterise only the point layer of ggplot
rBins_WT_vs_KOrS3D2_NoLog10 <- rasterize(pBins_WT_vs_KOrS3D2_Filtered_Targeted_NOLOG10,
layers = 'Point', dpi = 400)Show plot as presented in figure.
rBins_WT_vs_KOrS3D2_NoLog10Save to pdf rasterised scatter plot.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5G_RASTERIZED_CGI_NoLog10_WT_vs_KOrS3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_KOrS3D2_NoLog10, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)SUZ12-L 3D mutant H3K27me3 scatter plot.
Get linear fit.
intercet_wt_KOrL3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[1]
slope_wt_KOrL3D2 <- coef( lm(log_bins25kb_K27me3_rescue$KOrL3D2 ~ log_bins25kb_K27me3_rescue$WT2 ) )[2] Check linear fit parameters
message("WT vs KO+L-3D: intercept = ", round(intercet_wt_KOrL3D2, 3),
" slope = ", round(slope_wt_KOrL3D2, 3))WT vs KO+L-3D: intercept = 0.444 slope = 0.751
Filter the bins like before. Start by removing the many “rest of the genome” bins
indx_dups_ROTG_wt_KOrL3D2 <- which(duplicated(log_bins25kb_K27me3_rescue[log_bins25kb_K27me3_rescue$type2 == "Rest of the genome", c("WT2", "KOrL3D2")]))
row_names_dups_ROTG <- rownames(log_bins25kb_K27me3_rescue[indx_dups_ROTG_wt_KOrL3D2, ])
wt_KOrL3D2_bins <- log_bins25kb_K27me3_rescue[!rownames(log_bins25kb_K27me3_rescue) %in% row_names_dups_ROTG,]Filter the “PcG targeted CGI” in KO rescue SUZ12-Long 3D mutant vs WT dataset
indx_dups_PTC_wt_KOrL3D2 <- which(duplicated(wt_KOrL3D2_bins[wt_KOrL3D2_bins$type2 == "PcG targeted CGI", c("WT2", "KOrL3D2")] ) )
row_names_dups_PTC <- rownames(wt_KOrL3D2_bins[indx_dups_PTC_wt_KOrL3D2, ])
wt_KOrL3D2_bins <- wt_KOrL3D2_bins[!rownames(wt_KOrL3D2_bins) %in% row_names_dups_PTC,]And the +1 and -1 flanking bins
indx_dups_P1M1_wt_KOrL3D2 <- which(duplicated(wt_KOrL3D2_bins[wt_KOrL3D2_bins$type2 %in% c("PcG targeted CGI + 1", "PcG targeted CGI + 1"), c("WT2", "KOrL3D2")] ) )
row_names_dups_P1M1 <- rownames(wt_KOrL3D2_bins[indx_dups_P1M1_wt_KOrS3D2, ])
wt_KOrL3D2_bins <- wt_KOrL3D2_bins[!rownames(wt_KOrL3D2_bins) %in% row_names_dups_P1M1,]Check number of bins after filtering:
num_fltrd_bins_2.5kb <- nrow(wt_KOrL3D2_bins)
message("Number of bins of 2.5kb width to plot: ", num_fltrd_bins_2.5kb)Number of bins of 2.5kb width to plot: 14481
Plot figure S5F
ggplot(wt_KOrL3D2_bins) +
aes(x = WT2, y = KOrL3D2, colour = type2) +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_KOrL3D2, intercept = intercet_wt_KOrL3D2,
linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 3.5), ylim = c(0, 3.5), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT#2 H3K27me3 signal (" ~ log[10] ~ ")" ),
y = expression("Spike-in normalized KO+L-3D H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th -> pBins_WT_vs_KOrL3D2_Filtered_Targeted
pBins_WT_vs_KOrL3D2_Filtered_TargetedSave to pdf vector.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5F_CGI_coloured_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_KOrL3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Rasterise only the point layer of ggplot
rBins_WT_vs_KOrL3D2_Filtered_Targeted <- rasterize(pBins_WT_vs_KOrL3D2_Filtered_Targeted,
layers = 'Point', dpi = 400)Show plot as presented in figure.
rBins_WT_vs_KOrL3D2_Filtered_TargetedSave to pdf rasterised scatter plot.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5F_RASTERIZED_CGI_coloured_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_KOrL3D2_Filtered_Targeted, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Show the same plot but without the log10 scaling of the data.
ggplot(wt_KOrL3D2_bins) +
aes(x = 10^WT2, y = 10^KOrL3D2, colour = type2) +
geom_point(size = 0.75, alpha = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linewidth = 0.2) +
geom_abline(slope = slope_wt_KOrS3D2, intercept = intercet_wt_KOrS3D2,
linetype = 'dashed', linewidth = 0.2) +
coord_fixed(ratio = 1, xlim = c(0, 150), ylim = c(0, 150), clip = 'on' ) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0, 0.5) ) ) +
scale_colour_manual(values = c("PcG targeted CGI" = '#A2234C',
"PcG targeted CGI - 1" = '#203059',
"PcG targeted CGI + 1" = '#203059',
"Rest of the genome" = '#72817F'),
breaks = c("PcG targeted CGI",
"PcG targeted CGI + 1",
"Rest of the genome"),
labels = c("PcG targeted CGI" = "Targeted",
"PcG targeted CGI + 1" = "Flanking",
"Rest of the genome" = "Rest of genome") ) +
labs(x = expression("Spike-in normalized WT#2 H3K27me3 signal" ),
y = expression("Spike-in normalized KO+L-3D H3K27me3 signal" ) ) +
guides(colour = guide_legend(override.aes = list(alpha = 0.99))) +
scatter_th +
theme(legend.position = c(0.27, 0.92) ) -> pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10
pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10Save to pdf vector.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5F_CGI_NoLog10_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Rasterise only the point layer of ggplot
rBins_WT_vs_KOrL3D2_NoLog10 <- rasterize(pBins_WT_vs_KOrL3D2_Filtered_Targeted_NOLOG10,
layers = 'Point', dpi = 400)Show plot as presented in figure.
rBins_WT_vs_KOrL3D2_NoLog10Save to pdf rasterised scatter plot.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5F_RASTERIZED_CGI_NoLog10_WT_vs_KOrL3D2_bins_n", num_fltrd_bins_2.5kb, ".pdf"),
plot = rBins_WT_vs_KOrL3D2_NoLog10, device = cairo_pdf, units = "cm",
width = 4.2, height = 4.2)Display the same H3K27me3 signal in 2.5kb genomic bins as boxplots.
Code below is needed to colour the ggplot2 facet strip text.
# requires package ggh4x
CGI_coloured_strip <-
strip_themed(background_x = elem_list_rect(fill = c(
'#203059', '#A2234C', '#203059', '#72817F'
)),
text_x = elem_list_text(colour = c('white')))
strip_CGI_conversion <- c(
`PcG targeted CGI - 1` = "Flanking up",
`PcG targeted CGI` = "Targeted CGI",
`PcG targeted CGI + 1` = "Flanking down",
`Rest of the genome` = "Rest of genome"
)Reshape the data for plotting.
log_bins25kb_K27me3 |>
as_tibble() |>
pivot_longer(cols = c("WT", 'CSex4', 'dex4', 'KO'), names_to = 'Sample', values_to = "log10_H3K27me3") |>
mutate(Sample = factor(Sample, levels = c("WT", 'CSex4', 'dex4', 'KO') ) ) |>
mutate(type2 = factor(type2, levels = c('PcG targeted CGI - 1', 'PcG targeted CGI', 'PcG targeted CGI + 1', 'Rest of the genome') ) ) -> endogenous_bins
log_bins25kb_K27me3_rescue |>
as_tibble() |>
pivot_longer(cols = c("WT2", 'KOrL4', 'KOrL6', 'KOrS1', 'KOrS3', 'KOrL3D2', 'KOrS3D2'),
names_to = 'Sample', values_to = "log10_H3K27me3") |>
mutate(Sample = factor(Sample, levels = c("WT2", 'KOrL4', 'KOrL6', 'KOrS1', 'KOrS3', 'KOrL3D2', 'KOrS3D2') ) ) |>
mutate(type2 = factor(type2,
levels = c('PcG targeted CGI - 1',
'PcG targeted CGI',
'PcG targeted CGI + 1',
'Rest of the genome') )
) -> rescue_binsPlot boxplot for panel 5F without the KO rescue lines
ggplot(endogenous_bins) +
aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
facet_wrap2(~type2, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip,
labeller = labeller(type2 = strip_CGI_conversion ) ) +
geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
scale_x_discrete(labels = c('dex4' = '∆ex4') ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
scale_fill_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120",
'dex4' = "#F7CB48",'KO' = "#728189") ) +
labs(y = expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
boxplot_th -> pBoxPlot_CGI_quant_endogenous
pBoxPlot_CGI_quant_endogenousSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_lines.pdf"),
plot = pBoxPlot_CGI_quant_endogenous, device = cairo_pdf, units = "cm",
width = 7.3, height = 5)Now same plot with rescue lines
ggplot(rescue_bins) +
aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
facet_wrap2(~type2, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip,
labeller = labeller(type2 = strip_CGI_conversion ) ) +
geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
scale_x_discrete(labels = c('WT2' = 'WT', 'KOrL4' = 'KO+L#1',
'KOrL6' = 'KO+L#2', 'KOrS1' = 'KO+S#1',
'KOrS3' = 'KO+S#2',
'KOrL3D2' = 'KO+L-3D', 'KOrS3D2' = 'KO+S-3D') ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.02), add = 0),
n.breaks = 6) +
coord_cartesian(ylim = c(0, 2.25), clip = 'on') +
scale_fill_manual(values = c('WT2' = "#377AA3",
"KOrL4" = "mediumpurple2", "KOrL6" = "mediumpurple3",
"KOrS1" = "darkorange1", "KOrS3" = "darkorange2",
"KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC")
) +
labs(y = expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
boxplot_th -> pBoxPlot_CGI_quant_rescues
pBoxPlot_CGI_quant_rescuesSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5F_2.5kb_Bins_Boxplot_rescue_lines.pdf"),
plot = pBoxPlot_CGI_quant_rescues, device = cairo_pdf, units = "cm",
width = 7.5, height = 5)Combine the 2 into one plot:
rbind(endogenous_bins, rescue_bins) |>
subset(! Sample %in% c('KOrL6', 'KOrS3') ) |> #'WT2',
ggplot() +
aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
facet_wrap2(~type2, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip,
labeller = labeller(type2 = strip_CGI_conversion ) ) +
geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
scale_x_discrete(labels = c('dex4' = '∆ex4', 'KOrL4' = 'KO+L',
'KOrS1' = 'KO+S', 'KOrL3D2' = 'KO+L-3D',
'KOrS3D2' = 'KO+S-3D') ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
scale_fill_manual(values = c('WT' = "#377AA3", 'WT2' = "#377AA3", 'CSex4' = "#B65120",
'dex4' = "#F7CB48",'KO' = "#728189",
"KOrL4" = "mediumpurple2", "KOrS1" = "darkorange1",
"KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC") ) +
labs(y = expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
boxplot_th -> pBoxPlot_CGI_quant
pBoxPlot_CGI_quantI think the fact that the spike-in normalisation is was done with 2 different batches might explain with the KO rescues are lower than the KO.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_rescue_lines.pdf"),
plot = pBoxPlot_CGI_quant, device = cairo_pdf, units = "cm",
width = 7.5, height = 5)Combine up and down flanking regions into facet
CGI_coloured_strip <-
strip_themed(background_x = elem_list_rect(fill = c(
'#A2234C', '#203059', '#72817F')),
text_x = elem_list_text(colour = c('white')))
strip_CGI_conversion <- c(
`Targeted CGI` = "Targeted CGI",
`Flanking region` = "Flanking",
`Rest of the genome` = "Rest of genome"
)Reshape data
samples_batch1 <- c('WT', 'dex4', 'CSex4', 'KO')
samples_batch2 <- c('WT2', 'KOrL4', 'KOrL6', 'KOrS1', 'KOrS3', 'KOrL3D2', 'KOrS3D2')
rbind(endogenous_bins, rescue_bins) |>
subset(! Sample %in% c('KOrL6', 'KOrS3') ) |>
mutate(type3 = type2) |>
mutate(type3 = case_when(type3 == 'PcG targeted CGI - 1' ~ 'Flanking region',
type3 == 'PcG targeted CGI + 1' ~ 'Flanking region',
type3 == 'PcG targeted CGI' ~ 'Targeted CGI',
type3 == 'Rest of the genome' ~ 'Rest of the genome') ) |>
mutate(type3 = factor(type3, levels = c('Targeted CGI', 'Flanking region', 'Rest of the genome'))) |>
mutate(batch = case_when(Sample %in% samples_batch1 ~ 'Batch1',
Sample %in% samples_batch2 ~ 'Batch2')
) -> combinedPlot figure 5F final.
ggplot(combined) +
aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
facet_wrap2(~batch+type3, nrow = 1, scales = 'free_x', strip = CGI_coloured_strip,
labeller = labeller(type3 = strip_CGI_conversion ) ) +
geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
scale_x_discrete(labels = c('WT' = 'WT#1', 'WT2' = 'WT#2',
'dex4' = '∆ex4', 'KOrL4' = 'KO+L',
'KOrS1' = 'KO+S', 'KOrL3D2' = 'KO+L-3D',
'KOrS3D2' = 'KO+S-3D') ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
scale_fill_manual(values = c('WT' = "#377AA3", 'WT2' = "#377AA3", 'CSex4' = "#B65120",
'dex4' = "#F7CB48",'KO' = "#728189",
"KOrL4" = "mediumpurple2", "KOrS1" = "darkorange1",
"KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC") ) +
labs(y = expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
boxplot_th -> pBoxPlot_CGI_quant_3facets
pBoxPlot_CGI_quant_3facetsSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_rescue_lines_3facets.pdf"),
plot = pBoxPlot_CGI_quant_3facets, device = cairo_pdf, units = "cm",
width = 11.0, height = 5)Split the 2 batches of ChIP-seq in one main and one supplementary figure.
subset(combined, batch == 'Batch1') |>
ggplot() +
aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
facet_wrap2( ~type3, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip,
labeller = labeller(type3 = strip_CGI_conversion ) ) +
geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
scale_x_discrete(labels = c('dex4' = '∆ex4') ) +
scale_y_continuous(expand = expansion(mult = 0, add = c(0.2, 0.2) ) ) +
coord_cartesian(ylim = c(0, 3.5), clip = 'on') +
scale_fill_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120",
'dex4' = "#F7CB48",'KO' = "#728189") ) +
labs(y = expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
boxplot_th -> pBoxPlot_CGI_quant_3facets_batch1
pBoxPlot_CGI_quant_3facets_batch1Save to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5F_2.5kb_Bins_Boxplot_endogenous_lines_3facets_batch1.pdf"),
plot = pBoxPlot_CGI_quant_3facets_batch1, device = cairo_pdf, units = "cm",
width = 5.5, height = 5)Batch 2
subset(combined, batch == 'Batch2') |>
ggplot() +
aes(x = Sample, y = log10_H3K27me3, fill = Sample) +
facet_wrap2(~type3, nrow = 1, scales = 'fixed', strip = CGI_coloured_strip,
labeller = labeller(type3 = strip_CGI_conversion ) ) +
geom_boxplot(outlier.shape = NA, linewidth = 0.2, show.legend = F) +
scale_x_discrete(labels = c('WT2' = 'WT', 'KOrL4' = 'KO+L', 'KOrS1' = 'KO+S',
'KOrL3D2' = 'KO+L-3D', 'KOrS3D2' = 'KO+S-3D') ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.02), add = 0) ) +
coord_cartesian(ylim = c(0, 2.5), clip = 'on') +
scale_fill_manual(values = c('WT2' = "#377AA3",
"KOrL4" = "mediumpurple2", "KOrS1" = "darkorange1",
"KOrL3D2" = "#74A57F", "KOrS3D2" = "#E8B4BC") ) +
labs(y = expression("Spike-in normalized H3K27me3 signal (" ~ log[10] ~ ")" ) ) +
boxplot_th -> pBoxPlot_CGI_quant_3facets_batch2
pBoxPlot_CGI_quant_3facets_batch2ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5H_2.5kb_Bins_Boxplot_rescue_lines_3facets_batch2.pdf"),
plot = pBoxPlot_CGI_quant_3facets_batch2, device = cairo_pdf, units = "cm",
width = 5.5, height = 5)CGI = CpG islands.
Targted = SUZ12 peak was found to overlap the CGI.
Non-targeted = SUZ12 didn’t bind that CGI.
Import bed files for the boxplots. Here every sample is identified by a number.
# b <- 1
region <- c()
sample <- c()
beds <- list()
for (b in 1:length(bed_paths)) {
# define region based on letter
if ( grepl(pattern = "^[1-4]A", x = bed_paths[b] ) ) { region[b] <- "CGI_H3K27me3_genes" }
else if (grepl(pattern = "^[1-4]B", x = bed_paths[b] ) ) { region[b] <- "CGI_nonPcG_genes" }
# define sample based on number
if ( grepl(pattern = "^1", x = bed_paths[b]) ) { sample[b] <- "WT" }
else if (grepl(pattern = "^2", x = bed_paths[b] ) ) { sample[b] <- "∆ex4" }
else if (grepl(pattern = "^3", x = bed_paths[b] ) ) { sample[b] <- "KO" }
else if (grepl(pattern = "^4", x = bed_paths[b] ) ) { sample[b] <- "CSex4" }
message("Importing sample: ", sample[b], ", region: ", region[b])
# read in each bed file and annotate it
read_delim(file = file.path(round17_meta_dir, bed_paths[b]), delim = "\t",
col_names = c('chr', 'start', 'end', 'min', 'average', 'max', 'sum'),
show_col_types = FALSE) |>
mutate(Sample = sample[b], Region = region[b]) |>
group_by(Sample, Region) |>
mutate(Numbered_region = n() ) |>
ungroup() -> beds[[b]]
names(beds)[b] <- paste0(sample[b], "_", region[b])
}Importing sample: WT, region: CGI_H3K27me3_genes
Importing sample: WT, region: CGI_nonPcG_genes
Importing sample: ∆ex4, region: CGI_H3K27me3_genes
Importing sample: ∆ex4, region: CGI_nonPcG_genes
Importing sample: KO, region: CGI_H3K27me3_genes
Importing sample: KO, region: CGI_nonPcG_genes
Importing sample: CSex4, region: CGI_H3K27me3_genes
Importing sample: CSex4, region: CGI_nonPcG_genes
Merge list of dataframes (bed files) into one.
all_beds <- do.call('rbind', beds) |>
mutate(Sample = factor(Sample, levels = c("WT", "CSex4", "∆ex4", "KO")) ) |>
mutate(Pretty_Region = case_when(Region == "CGI_H3K27me3_genes" ~ "PRC2 targeted CGI",
Region == "CGI_nonPcG_genes" ~ "Other CGI") ) |>
mutate(Pretty_Region = paste0(Pretty_Region, " (n = ", Numbered_region, ")" ) ) |>
mutate(Pretty_Region = factor(Pretty_Region,
levels = c("PRC2 targeted CGI (n = 2665)",
"Other CGI (n = 9999)") ) )Set colours for facets strip text
Metaplot_coloured_strip <- strip_themed(background_x = elem_list_rect(fill = c('#A2234C', '#003e1f') ),
text_x = elem_list_text(colour = c('white') ) )
strip_CGI_conversion <- c(
`PRC2 targeted CGI (n = 2665)` = "SUZ12 Targeted CGI (n = 2665)",
`Other CGI (n = 9999)` = "Non-targeted CGI (n = 9999)"
)Plot H3K27me3 signal in the CGI targeted regions used for the meta genes later on as a boxplot.
ggplot(all_beds) +
aes(x = Sample, y = log(average + 1, base = 10), fill = Sample) +
facet_wrap2( ~Pretty_Region, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_boxplot(outlier.shape = NA, show.legend = F, linewidth = 0.2) +
scale_fill_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120",
'∆ex4' = "#F7CB48",'KO' = "#728189") ) +
labs(y = expression("Spike-in norm. average H3K27me3 signal (" ~ log[10] ~ " + 1)" ) ) +
boxplot_th -> p_metagenes_regions_signal_boxplot
p_metagenes_regions_signal_boxplotSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Metagenes_regions_H3K27me3_signal_Boxplot.pdf"),
plot = p_metagenes_regions_signal_boxplot, device = cairo_pdf, units = "cm",
width = 7.3, height = 5)Import profiles stored in text files.
First import the files Ivano made for the targeted CGI.
CGI_targeted_K27me3_WT <- read_delim(
file = file.path(CGI_Dir, "14_MetaCGIplots/7A_GENEplot_5000/GENEprofile_7A_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "PRC2 targeted CGI", Sample = "WT")
CGI_targeted_K27me3_dex4 <- read_delim(
file = file.path(CGI_Dir, "14_MetaCGIplots/8A_GENEplot_5000/GENEprofile_8A_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "PRC2 targeted CGI", Sample = "dex4")
CGI_targeted_K27me3_KO <- read_delim(
file = file.path(CGI_Dir, "14_MetaCGIplots/9A_GENEplot_5000/GENEprofile_9A_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "PRC2 targeted CGI", Sample = "KO")
CGI_targeted_K27me3_CSex4 <- read_delim(
file = file.path(CGI_Dir, "16_MetaCGIplotsEx4100/E3_GENEplot_5000/GENEprofile_E3_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "PRC2 targeted CGI", Sample = "CSex4")
CGI_targeted_K27me3 <- rbind(CGI_targeted_K27me3_WT, CGI_targeted_K27me3_dex4, CGI_targeted_K27me3_KO, CGI_targeted_K27me3_CSex4) |>
as_tibble()Then import the non-targeted CGI.
CGI_other_K27me3_WT <- read_delim(
file = file.path(CGI_Dir, "14_MetaCGIplots/7B_GENEplot_5000/GENEprofile_7B_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "Other CGI", Sample = "WT")
CGI_other_K27me3_dex4 <- read_delim(
file = file.path(CGI_Dir, "14_MetaCGIplots/8B_GENEplot_5000/GENEprofile_8B_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "Other CGI", Sample = "dex4")
CGI_other_K27me3_KO <- read_delim(
file = file.path(CGI_Dir, "14_MetaCGIplots/9B_GENEplot_5000/GENEprofile_9B_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "Other CGI", Sample = "KO")
CGI_other_K27me3_CSex4 <- read_delim(
file = file.path(CGI_Dir,"16_MetaCGIplotsEx4100/E1_GENEplot_5000/GENEprofile_E1_5000.txt"),
delim = '\t', show_col_types = FALSE ) |>
setNames(c("MetaPosition", "K27me3_signal") ) |>
mutate(type = "Other CGI", Sample = "CSex4")
CGI_other_K27me3 <- rbind(CGI_other_K27me3_WT, CGI_other_K27me3_dex4, CGI_other_K27me3_KO, CGI_other_K27me3_CSex4) |>
as_tibble()Combine data and reshape it for plotting.
Meta_K27me3 <- rbind(CGI_targeted_K27me3, CGI_other_K27me3) |>
mutate(MetaPosition = MetaPosition - 5000) |>
mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
group_by(Sample, type) |>
arrange(MetaPosition) |>
ungroup() |>
mutate(Sample = factor(Sample, levels = c('WT', 'CSex4', 'dex4', 'KO') ) ) |>
mutate(MetaPosition = MetaPosition / 1000) # turn bases in kilo-basesRename strip for the figure
strip_CGI_conversion <- c(
`PRC2 targeted CGI` = "SUZ12 targeted CGI",
`Other CGI` = "Non-targeted CGI"
)Since the data contains every single nucleotide signal, I use only 1/4 of the data to plot as this does not affect the trend, but makes the pdf file easier to work with. This is done by using sample_frac(size = 0.25).
Meta_K27me3 |>
sample_frac(size = 0.25) |>
ggplot() +
aes(x = MetaPosition, y = K27me3_signal, colour = Sample) +
facet_wrap2( ~type, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = 0.025, add = 0 ) ) +
scale_colour_manual(values = c('WT' = "#377AA3", 'CSex4' = "#B65120",
'dex4' = "#F7CB48",'KO' = "#728189"),
labels = c('dex4' = '∆ex4') ) +
# coord_cartesian(ylim = c(0, NA), clip = 'off' ) +
labs(x = 'Distance to CGI center (kb)',
y = expression("Spike-in normalised H3K27me3 signal (" ~ log[10] ~ ")" )) +
profile_th -> pMetaPlot_CGI
pMetaPlot_CGISave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5G_Targeted_CGI_Metagene_Profiles_Endogenous_Lines.pdf"),
plot = pMetaPlot_CGI, device = cairo_pdf, units = "cm",
width = 7.3, height = 4.9)Metagene profile spike-in normalised.
Import the files Enrique made for the targeted CGI for the MTF2 ChIP-seq.
trgtd_K27me3_WT1 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M1A_GENEplot_5000/GENEprofile_M1A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "WT1")
trgtd_K27me3_WT2 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M2A_GENEplot_5000/GENEprofile_M2A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "WT2")
trgtd_K27me3_L1 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M5A_GENEplot_5000/GENEprofile_M5A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "KOrL1")
trgtd_K27me3_L2 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M6A_GENEplot_5000/GENEprofile_M6A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "KOrL2")
trgtd_K27me3_S1 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M3A_GENEplot_5000/GENEprofile_M3A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "KOrS1")
trgtd_K27me3_S2 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M4A_GENEplot_5000/GENEprofile_M4A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "KOrS2")
trgtd_K27me3_L3D <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "31A_GENEplot_5000/GENEprofile_31A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "KOrL3D1")
trgtd_K27me3_S3D <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "32A_GENEplot_5000/GENEprofile_32A_5000.txt",
epitope = "H3K27me3", geneset_type = "PRC2 targeted CGI",
sample_name = "KOrS3D1")
CGI_targeted_K27me3 <- rbind(trgtd_K27me3_WT1, trgtd_K27me3_WT2,
trgtd_K27me3_L1, trgtd_K27me3_L2,
trgtd_K27me3_S1, trgtd_K27me3_S2,
trgtd_K27me3_L3D, trgtd_K27me3_S3D) |>
as_tibble()Import rescues signal in not targeted CGIs.
nontrgtd_K27me3_WT1 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M1B_GENEplot_5000/GENEprofile_M1B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "WT1")
nontrgtd_K27me3_WT2 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M2B_GENEplot_5000/GENEprofile_M2B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "WT2")
nontrgtd_K27me3_L1 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M5B_GENEplot_5000/GENEprofile_M5B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "KOrL1")
nontrgtd_K27me3_L2 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M6B_GENEplot_5000/GENEprofile_M6B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "KOrL2")
nontrgtd_K27me3_S1 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M3B_GENEplot_5000/GENEprofile_M3B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "KOrS1")
nontrgtd_K27me3_S2 <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "M4B_GENEplot_5000/GENEprofile_M4B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "KOrS2")
nontrgtd_K27me3_L3D <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "31B_GENEplot_5000/GENEprofile_31B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "KOrL3D1")
nontrgtd_K27me3_S3D <- read_prfl(path_dir = round23_metagene_dir,
txt_name = "32B_GENEplot_5000/GENEprofile_32B_5000.txt",
epitope = "H3K27me3", geneset_type = "Other CGI",
sample_name = "KOrS3D1")
CGI_other_K27me3 <- rbind(nontrgtd_K27me3_WT1, nontrgtd_K27me3_WT2,
nontrgtd_K27me3_L1, nontrgtd_K27me3_L2,
nontrgtd_K27me3_S1, nontrgtd_K27me3_S2,
nontrgtd_K27me3_L3D, nontrgtd_K27me3_S3D) |>
as_tibble()Combine data and reshape it for plotting.
Meta_K27me3 <- rbind(CGI_targeted_K27me3, CGI_other_K27me3) |>
mutate(MetaPosition = MetaPosition - 5000) |>
mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |> # remove last number (i.e. replicate)
mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
group_by(Genotype, type, MetaPosition) |>
mutate(Average_Signal = mean(H3K27me3_signal, na.rm = T) )|>
arrange(MetaPosition) |>
ungroup() |>
mutate(Genotype = str_replace(pattern = 'r', replacement = '\\+', Genotype)) |>
mutate(Genotype = factor(Genotype, levels = rev(c('WT', 'KO+L', 'KO+S', 'KO+L3D', 'KO+S3D')) ) ) |>
mutate(Sample = factor(Sample,
levels = c('WT1', 'WT2', 'KOrL1', 'KOrL2', 'KOrS1', 'KOrS2', 'KOrL3D1', 'KOrS3D1'))) |>
mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
select(MetaPosition, type, Genotype, Average_Signal) |>
unique() |> sample_frac(size = 0.25) Set colour palettes and plot
genotype_palette <- c('WT' = "#377AA3", 'KO+L' = "mediumpurple2",
'KO+S' = "darkorange1", 'KO+L3D' = "#74A57F",
'KO+S3D'= "#E8B4BC")
ggplot(Meta_K27me3) +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~type, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = 0.1),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.025), add = 0 ) ) +
scale_colour_manual(values = genotype_palette,
guide = guide_legend(reverse = TRUE) ) +
coord_cartesian(ylim = c(0, NA), clip = 'off' ) +
labs(x = 'Distance to CGI center (kb)',
y = 'Spike-in normalised H3K27me3 signal') +
profile_th -> pMetaPlot_CGI_H3K27me3_Rescues
pMetaPlot_CGI_H3K27me3_RescuesSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5I_H3K27me3_Targeted_CGI_Metagene_Profiles_Rescue_Lines.pdf"),
plot = pMetaPlot_CGI_H3K27me3_Rescues, device = cairo_pdf, units = "cm",
width = 7.3, height = 4.7)MTF2 is a component of the PRC2.1 complex.
Metagene profile spike-in normalised.
Import the files Enrique made for the targeted CGI for the MTF2 ChIP-seq.
trgtd_MTF2_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M1A_GENEplot_5000/GENEprofile_M1A_5000.txt",
epitope = "MTF2", geneset_type = "PRC2 targeted CGI",
sample_name = "WT1")
trgtd_MTF2_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M2A_GENEplot_5000/GENEprofile_M2A_5000.txt",
epitope = "MTF2", geneset_type = "PRC2 targeted CGI",
sample_name = "WT2")
trgtd_MTF2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M3A_GENEplot_5000/GENEprofile_M3A_5000.txt",
epitope = "MTF2", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4a")
trgtd_MTF2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M4A_GENEplot_5000/GENEprofile_M4A_5000.txt",
epitope = "MTF2", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4b")
trgtd_MTF2_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M5A_GENEplot_5000/GENEprofile_M5A_5000.txt",
epitope = "MTF2", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4a")
trgtd_MTF2_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M6A_GENEplot_5000/GENEprofile_M6A_5000.txt",
epitope = "MTF2", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4b")
CGI_targeted_MTF2 <- rbind(trgtd_MTF2_WT1, trgtd_MTF2_WT2,
trgtd_MTF2_CSex4a, trgtd_MTF2_CSex4b,
trgtd_MTF2_dex4a, trgtd_MTF2_dex4b) |>
as_tibble()Import MTF2 signal at non-targeted CGI.
nontrgtd_MTF2_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M1B_GENEplot_5000/GENEprofile_M1B_5000.txt",
epitope = "MTF2", geneset_type = "Other CGI",
sample_name = "WT1")
nontrgtd_MTF2_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M2B_GENEplot_5000/GENEprofile_M2B_5000.txt",
epitope = "MTF2", geneset_type = "Other CGI",
sample_name = "WT2")
nontrgtd_MTF2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M3B_GENEplot_5000/GENEprofile_M3B_5000.txt",
epitope = "MTF2", geneset_type = "Other CGI",
sample_name = "CSex4a")
nontrgtd_MTF2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M4B_GENEplot_5000/GENEprofile_M4B_5000.txt",
epitope = "MTF2", geneset_type = "Other CGI",
sample_name = "CSex4b")
nontrgtd_MTF2_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M5B_GENEplot_5000/GENEprofile_M5B_5000.txt",
epitope = "MTF2", geneset_type = "Other CGI",
sample_name = "∆ex4a")
nontrgtd_MTF2_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "M6B_GENEplot_5000/GENEprofile_M6B_5000.txt",
epitope = "MTF2", geneset_type = "Other CGI",
sample_name = "∆ex4b")
CGI_other_MTF2 <- rbind(nontrgtd_MTF2_WT1, nontrgtd_MTF2_WT2,
nontrgtd_MTF2_CSex4a, nontrgtd_MTF2_CSex4b,
nontrgtd_MTF2_dex4a, nontrgtd_MTF2_dex4b) |>
as_tibble()Combine data and reshape it for plotting. Since the data contains every single nucleotide signal, I use only 1/4 of the data to plot as this does not affect the trend, but makes the pdf file easier to work with. This is done by using sample_frac(size = 0.25).
Meta_MTF2 <- rbind(CGI_targeted_MTF2, CGI_other_MTF2) |>
mutate(MetaPosition = MetaPosition - 5000) |>
mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
group_by(Genotype, type, MetaPosition) |>
mutate(Average_Signal = mean(MTF2_signal, na.rm = T) )|>
arrange(MetaPosition) |>
ungroup() |>
mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
select(MetaPosition, type, Genotype, Average_Signal) |>
unique() |> sample_frac(size = 0.25) Set colour palettes
samples_palette <- c('WT1' = "#377AA3", 'WT2' = "#377AA3",
'CSex4a' = "#B65120", 'CSex4b' = "#B65120",
'∆ex4a' = "#F7CB48", '∆ex4b' = "#F7CB48")
genotype_palette <- c('WT' = "#377AA3", 'CSex4' = "#B65120", '∆ex4' = "#F7CB48")Plot MTF2 profile
ggplot(Meta_MTF2) +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~type, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = 0.1),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.025), add = 0 ) ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0, NA), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'Spike-in normalised MTF2 signal') +
profile_th -> pMetaPlot_CGI_MTF2
pMetaPlot_CGI_MTF2Save to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5I_MTF2_Metagene_Profiles.pdf"),
plot = pMetaPlot_CGI_MTF2, device = cairo_pdf, units = "cm",
width = 7.3, height = 4.9)Save for main figure.
Meta_MTF2$Epitope <- 'MTF2'
Meta_MTF2 |>
subset(type == 'PRC2 targeted CGI') |>
ggplot() +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~Epitope) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 5 ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0, NA), xlim = c(-10, 10), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'Spike-in normalised signal at SUZ12 targeted CGI') +
profile_th +
theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_MTF2
pMetaPlot_MTF2ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5I_MTF2_Targeted_CGI_Metagene_Profiles.pdf"),
plot = pMetaPlot_MTF2, device = cairo_pdf, units = "cm",
width = 4, height = 4.9)Metagene profile of spike-in normalised JARID2 ChIP-seq.
trgtd_JARID2_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J1A_GENEplot_5000/GENEprofile_J1A_5000.txt",
epitope = "JARID2", geneset_type = "PRC2 targeted CGI",
sample_name = "WT1")
trgtd_JARID2_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J2A_GENEplot_5000/GENEprofile_J2A_5000.txt",
epitope = "JARID2", geneset_type = "PRC2 targeted CGI",
sample_name = "WT2")
trgtd_JARID2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J3A_GENEplot_5000/GENEprofile_J3A_5000.txt",
epitope = "JARID2", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4a")
trgtd_JARID2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J4A_GENEplot_5000/GENEprofile_J4A_5000.txt",
epitope = "JARID2", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4b")
trgtd_JARID2_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J5A_GENEplot_5000/GENEprofile_J5A_5000.txt",
epitope = "JARID2", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4a")
trgtd_JARID2_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J6A_GENEplot_5000/GENEprofile_J6A_5000.txt",
epitope = "JARID2", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4b")
CGI_targeted_JARID2 <- rbind(trgtd_JARID2_WT1, trgtd_JARID2_WT2,
trgtd_JARID2_CSex4a, trgtd_JARID2_CSex4b,
trgtd_JARID2_dex4a, trgtd_JARID2_dex4b) |>
as_tibble()Import JARID2 signal at non-targeted CGI.
nontrgtd_JARID2_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J1B_GENEplot_5000/GENEprofile_J1B_5000.txt",
epitope = "JARID2", geneset_type = "Other CGI",
sample_name = "WT1")
nontrgtd_JARID2_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J2B_GENEplot_5000/GENEprofile_J2B_5000.txt",
epitope = "JARID2", geneset_type = "Other CGI",
sample_name = "WT2")
nontrgtd_JARID2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J3B_GENEplot_5000/GENEprofile_J3B_5000.txt",
epitope = "JARID2", geneset_type = "Other CGI",
sample_name = "CSex4a")
nontrgtd_JARID2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J4B_GENEplot_5000/GENEprofile_J4B_5000.txt",
epitope = "JARID2", geneset_type = "Other CGI",
sample_name = "CSex4b")
nontrgtd_JARID2_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J5B_GENEplot_5000/GENEprofile_J5B_5000.txt",
epitope = "JARID2", geneset_type = "Other CGI",
sample_name = "∆ex4a")
nontrgtd_JARID2_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "J6B_GENEplot_5000/GENEprofile_J6B_5000.txt",
epitope = "JARID2", geneset_type = "Other CGI",
sample_name = "∆ex4b")
CGI_other_JARID2 <- rbind(nontrgtd_JARID2_WT1, nontrgtd_JARID2_WT2,
nontrgtd_JARID2_CSex4a, nontrgtd_JARID2_CSex4b,
nontrgtd_JARID2_dex4a, nontrgtd_JARID2_dex4b) |>
as_tibble()Combine data and reshape it for plotting.
Meta_JARID2 <- rbind(CGI_targeted_JARID2, CGI_other_JARID2) |>
mutate(MetaPosition = MetaPosition - 5000) |>
mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
group_by(Genotype, type, MetaPosition) |>
mutate(Average_Signal = mean(JARID2_signal, na.rm = T) )|>
arrange(MetaPosition) |>
ungroup() |>
mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
select(MetaPosition, type, Genotype, Average_Signal) |>
unique() |> sample_frac(size = 0.25) Plot JARID2 profile.
ggplot(Meta_JARID2) +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~type, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.025), add = 0 ) ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0, NA), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'Spike-in normalised JARID2 signal') +
profile_th -> pMetaPlot_CGI_JARID2
pMetaPlot_CGI_JARID2Save to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5H_JARID2_Metagene_Profiles.pdf"),
plot = pMetaPlot_CGI_JARID2, device = cairo_pdf, units = "cm",
width = 7.3, height = 4.9)Save for main figure.
Meta_JARID2$Epitope <- 'JARID2'
Meta_JARID2 |>
subset(type == 'PRC2 targeted CGI') |>
ggplot() +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~Epitope) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 5 ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0, NA), xlim = c(-10, 10),clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'Spike-in normalised signal at SUZ12 targeted CGI') +
profile_th +
theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_JARID2
pMetaPlot_JARID2ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5H_JARID2_Targeted_CGI_Metagene_Profiles.pdf"),
plot = pMetaPlot_JARID2, device = cairo_pdf, units = "cm",
width = 4, height = 4.9)Import AEBP2 targeted CGI signal not spike-in normalised for metagene profile
trgtd_AEBP2_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA1A_GENEplot_5000/GENEprofile_nA1A_5000.txt",
epitope = "AEBP2", geneset_type = "PRC2 targeted CGI",
sample_name = "WT1")
trgtd_AEBP2_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA2A_GENEplot_5000/GENEprofile_nA2A_5000.txt",
epitope = "AEBP2", geneset_type = "PRC2 targeted CGI",
sample_name = "WT2")
trgtd_AEBP2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA3A_GENEplot_5000/GENEprofile_nA3A_5000.txt",
epitope = "AEBP2", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4a")
trgtd_AEBP2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA4A_GENEplot_5000/GENEprofile_nA4A_5000.txt",
epitope = "AEBP2", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4b")
trgtd_AEBP2_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA5A_GENEplot_5000/GENEprofile_nA5A_5000.txt",
epitope = "AEBP2", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4a")
trgtd_AEBP2_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA6A_GENEplot_5000/GENEprofile_nA6A_5000.txt",
epitope = "AEBP2", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4b")
CGI_targeted_AEBP2 <- rbind(trgtd_AEBP2_WT1, trgtd_AEBP2_WT2,
trgtd_AEBP2_CSex4a, trgtd_AEBP2_CSex4b,
trgtd_AEBP2_dex4a, trgtd_AEBP2_dex4b) |>
as_tibble()Import AEBP2 signal at non-targeted CGI.
nontrgtd_AEBP2_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA1B_GENEplot_5000/GENEprofile_nA1B_5000.txt",
epitope = "AEBP2", geneset_type = "Other CGI",
sample_name = "WT1")
nontrgtd_AEBP2_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA2B_GENEplot_5000/GENEprofile_nA2B_5000.txt",
epitope = "AEBP2", geneset_type = "Other CGI",
sample_name = "WT2")
nontrgtd_AEBP2_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA3B_GENEplot_5000/GENEprofile_nA3B_5000.txt",
epitope = "AEBP2", geneset_type = "Other CGI",
sample_name = "CSex4a")
nontrgtd_AEBP2_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA4B_GENEplot_5000/GENEprofile_nA4B_5000.txt",
epitope = "AEBP2", geneset_type = "Other CGI",
sample_name = "CSex4b")
nontrgtd_AEBP2_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA5B_GENEplot_5000/GENEprofile_nA5B_5000.txt",
epitope = "AEBP2", geneset_type = "Other CGI",
sample_name = "∆ex4a")
nontrgtd_AEBP2_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nA6B_GENEplot_5000/GENEprofile_nA6B_5000.txt",
epitope = "AEBP2", geneset_type = "Other CGI",
sample_name = "∆ex4b")
CGI_other_AEBP2 <- rbind(nontrgtd_AEBP2_WT1, nontrgtd_AEBP2_WT2,
nontrgtd_AEBP2_CSex4a, nontrgtd_AEBP2_CSex4b,
nontrgtd_AEBP2_dex4a, nontrgtd_AEBP2_dex4b) |>
as_tibble()Combine data and reshape it for plotting.
Meta_AEBP2 <- rbind(CGI_targeted_AEBP2, CGI_other_AEBP2) |>
mutate(MetaPosition = MetaPosition - 5000) |>
mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
group_by(Genotype, type, MetaPosition) |>
mutate(Average_Signal = mean(AEBP2_signal, na.rm = T) )|>
arrange(MetaPosition) |>
ungroup() |>
mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
select(MetaPosition, type, Genotype, Average_Signal) |>
unique() |> sample_frac(size = 0.25) Plot AEBP2 profile
genotype_palette <- c('WT' = "#377AA3", 'CSex4' = "#B65120", '∆ex4' = "#F7CB48")
ggplot(Meta_AEBP2) +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~type, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 3 ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0.05, 0.15), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'AEBP2 ChIP-seq signal') +
profile_th -> pMetaPlot_CGI_AEBP2
pMetaPlot_CGI_AEBP2Save to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5J_AEBP2_Metagene_Profiles.pdf"),
plot = pMetaPlot_CGI_AEBP2, device = cairo_pdf, units = "cm",
width = 6.7, height = 4.7)Plot for supplementary figure
Meta_AEBP2$Epitope <- 'AEBP2'
Meta_AEBP2 |>
subset(type == 'PRC2 targeted CGI') |>
ggplot() +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~Epitope) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 3 ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0.05, 0.15), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'ChIP-seq signal at SUZ12 targeted CGI') +
profile_th +
theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_AEBP2
pMetaPlot_AEBP2ggsave(path = pdf_dir_fig5,
filename = paste0("FigS5J_AEBP2_Targeted_CGI_Metagene_Profiles.pdf"),
plot = pMetaPlot_AEBP2, device = cairo_pdf, units = "cm",
width = 4, height = 4.9)Import RING1B targeted CGI signal not spike-in normalised for metagene profile
trgtd_RING1B_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR1A_GENEplot_5000/GENEprofile_nR1A_5000.txt",
epitope = "RING1B", geneset_type = "PRC2 targeted CGI",
sample_name = "WT1")
trgtd_RING1B_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR2A_GENEplot_5000/GENEprofile_nR2A_5000.txt",
epitope = "RING1B", geneset_type = "PRC2 targeted CGI",
sample_name = "WT2")
trgtd_RING1B_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR3A_GENEplot_5000/GENEprofile_nR3A_5000.txt",
epitope = "RING1B", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4a")
trgtd_RING1B_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR4A_GENEplot_5000/GENEprofile_nR4A_5000.txt",
epitope = "RING1B", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4b")
trgtd_RING1B_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR5A_GENEplot_5000/GENEprofile_nR5A_5000.txt",
epitope = "RING1B", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4a")
trgtd_RING1B_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR6A_GENEplot_5000/GENEprofile_nR6A_5000.txt",
epitope = "RING1B", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4b")
CGI_targeted_RING1B <- rbind(trgtd_RING1B_WT1, trgtd_RING1B_WT2,
trgtd_RING1B_CSex4a, trgtd_RING1B_CSex4b,
trgtd_RING1B_dex4a, trgtd_RING1B_dex4b) |>
as_tibble()Import RING1B signal at non-targeted CGI.
nontrgtd_RING1B_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR1B_GENEplot_5000/GENEprofile_nR1B_5000.txt",
epitope = "RING1B", geneset_type = "Other CGI",
sample_name = "WT1")
nontrgtd_RING1B_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR2B_GENEplot_5000/GENEprofile_nR2B_5000.txt",
epitope = "RING1B", geneset_type = "Other CGI",
sample_name = "WT2")
nontrgtd_RING1B_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR3B_GENEplot_5000/GENEprofile_nR3B_5000.txt",
epitope = "RING1B", geneset_type = "Other CGI",
sample_name = "CSex4a")
nontrgtd_RING1B_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR4B_GENEplot_5000/GENEprofile_nR4B_5000.txt",
epitope = "RING1B", geneset_type = "Other CGI",
sample_name = "CSex4b")
nontrgtd_RING1B_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR5B_GENEplot_5000/GENEprofile_nR5B_5000.txt",
epitope = "RING1B", geneset_type = "Other CGI",
sample_name = "∆ex4a")
nontrgtd_RING1B_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nR6B_GENEplot_5000/GENEprofile_nR6B_5000.txt",
epitope = "RING1B", geneset_type = "Other CGI",
sample_name = "∆ex4b")
CGI_other_RING1B <- rbind(nontrgtd_RING1B_WT1, nontrgtd_RING1B_WT2,
nontrgtd_RING1B_CSex4a, nontrgtd_RING1B_CSex4b,
nontrgtd_RING1B_dex4a, nontrgtd_RING1B_dex4b) |>
as_tibble()Combine data and reshape it for plotting.
Meta_RING1B <- rbind(CGI_targeted_RING1B, CGI_other_RING1B) |>
mutate(MetaPosition = MetaPosition - 5000) |>
mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
group_by(Genotype, type, MetaPosition) |>
mutate(Average_Signal = mean(RING1B_signal, na.rm = T) )|>
arrange(MetaPosition) |>
ungroup() |>
mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
select(MetaPosition, type, Genotype, Average_Signal) |>
unique() |> sample_frac(size = 0.25) Plot RING1B profile
ggplot(Meta_RING1B) +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~type, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ) ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0.06, NA), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'RING1B ChIP-seq signal') +
profile_th -> pMetaPlot_CGI_RING1B
pMetaPlot_CGI_RING1BSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5J_RING1B_Metagene_Profiles.pdf"),
plot = pMetaPlot_CGI_RING1B, device = cairo_pdf, units = "cm",
width = 7.3, height = 4.9)Plot for main figure
Meta_RING1B$Epitope <- 'RING1B'
Meta_RING1B |>
subset(type == 'PRC2 targeted CGI') |>
ggplot() +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~Epitope) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 6 ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0.05, 0.32), xlim = c(-10, 10), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'ChIP-seq signal at SUZ12 targeted CGI') +
profile_th +
theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_RING1B
pMetaPlot_RING1Bggsave(path = pdf_dir_fig5,
filename = paste0("Fig5J_RING1B_Targeted_CGI_Metagene_Profiles.pdf"),
plot = pMetaPlot_RING1B, device = cairo_pdf, units = "cm",
width = 4, height = 4.9)Import H2Aub targeted CGI signal not spike-in normalised for metagene profile
trgtd_H2Aub_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU1A_GENEplot_5000/GENEprofile_nU1A_5000.txt",
epitope = "H2Aub", geneset_type = "PRC2 targeted CGI",
sample_name = "WT1")
trgtd_H2Aub_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU2A_GENEplot_5000/GENEprofile_nU2A_5000.txt",
epitope = "H2Aub", geneset_type = "PRC2 targeted CGI",
sample_name = "WT2")
trgtd_H2Aub_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU3A_GENEplot_5000/GENEprofile_nU3A_5000.txt",
epitope = "H2Aub", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4a")
trgtd_H2Aub_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU4A_GENEplot_5000/GENEprofile_nU4A_5000.txt",
epitope = "H2Aub", geneset_type = "PRC2 targeted CGI",
sample_name = "CSex4b")
trgtd_H2Aub_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU5A_GENEplot_5000/GENEprofile_nU5A_5000.txt",
epitope = "H2Aub", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4a")
trgtd_H2Aub_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU6A_GENEplot_5000/GENEprofile_nU6A_5000.txt",
epitope = "H2Aub", geneset_type = "PRC2 targeted CGI",
sample_name = "∆ex4b")
CGI_targeted_H2Aub <- rbind(trgtd_H2Aub_WT1, trgtd_H2Aub_WT2,
trgtd_H2Aub_CSex4a, trgtd_H2Aub_CSex4b,
trgtd_H2Aub_dex4a, trgtd_H2Aub_dex4b) |>
as_tibble()Import H2Aub signal at non-targeted CGI.
nontrgtd_H2Aub_WT1 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU1B_GENEplot_5000/GENEprofile_nU1B_5000.txt",
epitope = "H2Aub", geneset_type = "Other CGI",
sample_name = "WT1")
nontrgtd_H2Aub_WT2 <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU2B_GENEplot_5000/GENEprofile_nU2B_5000.txt",
epitope = "H2Aub", geneset_type = "Other CGI",
sample_name = "WT2")
nontrgtd_H2Aub_CSex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU3B_GENEplot_5000/GENEprofile_nU3B_5000.txt",
epitope = "H2Aub", geneset_type = "Other CGI",
sample_name = "CSex4a")
nontrgtd_H2Aub_CSex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU4B_GENEplot_5000/GENEprofile_nU4B_5000.txt",
epitope = "H2Aub", geneset_type = "Other CGI",
sample_name = "CSex4b")
nontrgtd_H2Aub_dex4a <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU5B_GENEplot_5000/GENEprofile_nU5B_5000.txt",
epitope = "H2Aub", geneset_type = "Other CGI",
sample_name = "∆ex4a")
nontrgtd_H2Aub_dex4b <- read_prfl(path_dir = round21_meta_dir,
txt_name = "nU6B_GENEplot_5000/GENEprofile_nU6B_5000.txt",
epitope = "H2Aub", geneset_type = "Other CGI",
sample_name = "∆ex4b")
CGI_other_H2Aub <- rbind(nontrgtd_H2Aub_WT1, nontrgtd_H2Aub_WT2,
nontrgtd_H2Aub_CSex4a, nontrgtd_H2Aub_CSex4b,
nontrgtd_H2Aub_dex4a, nontrgtd_H2Aub_dex4b) |>
as_tibble()Combine data and reshape it for plotting.
Meta_H2Aub <- rbind(CGI_targeted_H2Aub, CGI_other_H2Aub) |>
mutate(MetaPosition = MetaPosition - 5000) |>
mutate(Genotype = str_remove(pattern = "[a1-z9]$", string = Sample)) |>
mutate(type = factor(type, levels = c("PRC2 targeted CGI", "Other CGI") )) |>
group_by(Genotype, type, MetaPosition) |>
mutate(Average_Signal = mean(H2Aub_signal, na.rm = T) )|>
arrange(MetaPosition) |>
ungroup() |>
mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4') ) ) |>
mutate(Sample = factor(Sample, levels = c('WT1', 'WT2', 'CSex4a', 'CSex4b', '∆ex4a', '∆ex4b') ) ) |>
mutate(MetaPosition = MetaPosition / 1000) |> # turn bases in kilo-bases
# subset a small a fraction of the total.
select(MetaPosition, type, Genotype, Average_Signal) |>
unique() |> sample_frac(size = 0.25) Plot H2Aub profile
ggplot(Meta_H2Aub) +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~type, strip = Metaplot_coloured_strip,
labeller = labeller(type = strip_CGI_conversion) ) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ) ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0.025, NA), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'H2AK119ub ChIP-seq signal') +
profile_th -> pMetaPlot_CGI_H2Aub
pMetaPlot_CGI_H2AubSave to pdf.
ggsave(path = pdf_dir_fig5,
filename = paste0("Fig5K_H2AK119ub_Metagene_Profiles.pdf"),
plot = pMetaPlot_CGI_H2Aub, device = cairo_pdf, units = "cm",
width = 7.3, height = 4.9)Plot for main figure
Meta_H2Aub$Epitope <- 'H2AK119ub'
Meta_H2Aub |>
subset(type == 'PRC2 targeted CGI') |>
ggplot() +
aes(x = MetaPosition, y = Average_Signal, colour = Genotype) +
facet_wrap2( ~Epitope) +
geom_line(linewidth = 0.5) +
scale_x_continuous(expand = expansion(mult = 0, add = c(0, 0.1) ),
breaks = c(-10, -5, 0, 5, 10) ) +
scale_y_continuous(expand = expansion(mult = c(0, 0.05), add = 0 ), n.breaks = 6 ) +
scale_colour_manual(values = genotype_palette ) +
coord_cartesian(ylim = c(0.05, 0.32), clip = 'on') +
labs(x = 'Distance to CGI center (kb)',
y = 'ChIP-seq signal at SUZ12 targeted CGI') +
profile_th +
theme(legend.position = c(0.85, 0.87)) -> pMetaPlot_CGI_H2Aub
pMetaPlot_CGI_H2Aubggsave(path = pdf_dir_fig5,
filename = paste0("Fig5K_H2AK119ub_Targeted_CGI_Metagene_Profiles.pdf"),
plot = pMetaPlot_CGI_H2Aub, device = cairo_pdf, units = "cm",
width = 4, height = 4.9)Check the overlap of the genes that contain a peak of SUZ12 in their promoter.
CSex4 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_100.txt"))$V1
WT_1 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_WT1.txt"))$V1
WT_2 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_WT2.txt"))$V1
dEx4_1 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_KO1.txt"))$V1
dEx4_2 <- read.table(file.path(round3_suz12_peaks_dir, "Suz12_KO2.txt"))$V1
WT <- WT_1[WT_1%in%WT_2]
dEx4 <- dEx4_1[dEx4_1%in%dEx4_2]
message("Num of genes with SUZ12 peaks:\nWT#1:\t", length(WT_1), '\nWT#2:\t', length(WT_2),
'\nWT shared: ', length(WT),
'\n∆ex4#1:\t', length(dEx4_1), '\n∆ex4#2:\t', length(dEx4_2), '\n∆ex4 shared: ', length(dEx4),
'\nCSex4:\t', length(CSex4))Num of genes with SUZ12 peaks:
WT#1: 3316
WT#2: 3924
WT shared: 3203
∆ex4#1: 4846
∆ex4#2: 2803
∆ex4 shared: 2773
CSex4: 3446
Save to pdf the peaks overlap as venn diagram
draw.venn( WT,CSex4,dEx4,
title = "SUZ12 target overlap",
subtitle = "",
nrtype = "pct",
# t_f = "Arial",
xtitle = "WT", #xt_f = "Arial",
ytitle = "CSex4", #yt_f = "Arial",
ztitle = "∆ex4", #zt_f = "Arial",
x_c = "#377AA3", y_c = "#B65120", z_c = "#F7CB48",
#nr_f = "Arial",
nr_fb = 1,
output = 'pdf', width = 500, height = 500,
filename = file.path(pdf_dir_fig5, 'FigS5A_SUZ12_targets_VennDiagram.pdf') )Now do the same for H3K27me3
CSex4 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_100.txt"))$V1
WT_1 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_WT1.txt"))$V1
WT_2 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_WT2.txt"))$V1
dEx4_1 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_KO1.txt"))$V1
dEx4_2 <- read.table(file.path(round2_h3k27me3_peaks_dir, "H3K27me3_KO2.txt"))$V1
WT <- WT_1[WT_1%in%WT_2]
dEx4 <- dEx4_1[dEx4_1%in%dEx4_2]
message("Num of genes with H3K27me3 peaks:\nWT#1:\t", length(WT_1), '\nWT#2:\t', length(WT_2),
'\nWT shared: ', length(WT),
'\n∆ex4#1:\t', length(dEx4_1), '\n∆ex4#2:\t', length(dEx4_2), '\n∆ex4 shared: ', length(dEx4),
'\nCSex4:\t', length(CSex4))Num of genes with H3K27me3 peaks:
WT#1: 5039
WT#2: 5973
WT shared: 4861
∆ex4#1: 7214
∆ex4#2: 5470
∆ex4 shared: 4717
CSex4: 3623
Plot:
draw.venn(WT,CSex4,dEx4,
title = "H3K27me3 target overlap",
subtitle = "", nrtype = "pct",
xtitle = "WT", #xt_f = "Arial",
ytitle = "CSex4", #yt_f = "Arial",
ztitle = "∆ex4", #zt_f = "Arial",
x_c = "#377AA3", y_c = "#B65120", z_c = "#F7CB48",
#nr_f = "Arial",
nr_fb = 1,
output = 'pdf', width = 500, height = 500,
filename = file.path(pdf_dir_fig5, 'FigS5A_H3K27me3_targets_VennDiagram.pdf'))genotype_palette <- c('WT' = "#377AA3", 'CSex4' = "#B65120", '∆ex4' = "#F7CB48", 'KO' = "#728189")
ChIP_qPCR_dir <- file.path(oneDrive_Dir, '11_ChIP/ZChIP_test2')
qPCR_path <- list.files(ChIP_qPCR_dir, pattern = '210603_ZChIP_test2.xlsx', full.names = T)WT_samples <- c("WTp", "WT#2")
CSex4_samples <- c("CSex4")
dEx4_samples <- c("∆ex4#2", "∆ex4#3")
KO_samples <- c("KO#1", "KO#2")data <- read_excel(qPCR_path, sheet = "melt")
# fix old name
data <- data |>
mutate(Sample = ifelse(Sample == 'WT4C', yes = 'CSex4', no = Sample))
data <- data |>
mutate(Genotype = case_when(Sample %in% WT_samples ~ 'WT',
Sample %in% CSex4_samples ~ 'CSex4',
Sample %in% dEx4_samples ~ '∆ex4',
Sample %in% KO_samples ~ 'KO') ) |>
mutate(Genotype = factor(Genotype, levels = c('WT', 'CSex4', '∆ex4','KO') ) ) |>
mutate(Sample = factor(Sample, levels = unique(data$Sample) ) ) |>
mutate(Gene = factor(Gene, levels = c("Gsc","Hoxd13","Nes","T (Bra)","Actb","Sox2") ) )Calculate avg for barplot
avg <- data %>%
group_by(Gene, Sample, Genotype) %>%
summarise(value = mean(value), .groups = 'keep')Plot ChIP-qPCR
ggplot(data)+
aes(x=Sample, y=value, fill = Genotype) +
facet_grid(~Gene)+
geom_col(data = avg, show.legend = F, width = 0.95 )+
geom_point(size = 1, shape = 21, stroke = 0.2)+
scale_fill_manual(values = genotype_palette ) +
scale_x_discrete(labels = c('WT', '', 'CSex4', '', '∆ex4', '', 'KO')) +
scale_y_continuous(expand = expansion(mult = c(0, 0.02), add = 0)) +
labs(y = "% of input") +
theme_classic(base_size = 6, base_family = 'Arial') +
theme(legend.position = c(0.92, 0.85),
legend.background = element_blank(),
legend.title = element_blank(),
legend.key = element_blank(),
legend.key.size = unit(1, 'mm'),
legend.text = element_text(margin = margin(l = -1)),
panel.grid.major.y = element_line(linewidth = 0.2),
axis.text = element_text(colour = 'black'),
axis.text.x = element_text(angle = 45, hjust = 1, margin = margin(t = -1, unit = 'mm')),
axis.title = element_text(size = 5, colour = 'black'),
axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(r = 0)),
axis.ticks.length = unit(x = 1, units = 'mm'),
axis.ticks.x = element_blank(),
axis.line = element_line(linewidth = 0.2),
strip.background = element_blank(),
panel.background = element_blank(),
plot.background = element_blank()) -> p_ChIP_qCPR
p_ChIP_qCPRggsave(filename = 'FigS5C_ChIP_qPCR_PRC2_targets.pdf',
path = pdf_dir_fig5, plot = p_ChIP_qCPR,
device = cairo_pdf, units = 'mm', width = 60, height = 50)Spie chart of the percentage distribution of H3K27me3 peaks in ∆ex4 ESCs. The radial increase corresponds to the fold-change relative to WT ESCs.
Code from Ivano’s script
# WT vs genome
x <- c(3100,2677,278)
y <- c(4661,3143,1248)
percentX <-x/sum(x)
percentY <-y/sum(y)
percentY <- (percentX/percentY)/sum(percentX/percentY)
names(percentX) <- c("Promoter","Genic","Intergenic")
names(percentY) <- names(percentX)
spie(percentY, percentX, multi = c(0.5, 1, 1.5), lwd = 1,
pie.labs = TRUE, grid = TRUE, grid.labs = TRUE, scale = TRUE, p1.circle = TRUE,
bg = c(NA, NA, NA),
col = c("thistle","palegreen","#728189") )Save to pdf.
cairo_pdf(file = file.path(pdf_dir_fig5, 'FigS5E_H3K27me3_peaks_location_SpieChart.pdf'),
width = 4, height = 4)
spie(percentY, percentX, multi = c(0.5, 1, 1.5), lwd = 1,
pie.labs = TRUE, grid = TRUE, grid.labs = TRUE, scale = TRUE, p1.circle = TRUE,
bg = c(NA, NA, NA),
col = c("thistle","palegreen","#728189") )
dev.off()quartz_off_screen
2
End of analysis.
sessioninfo::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.1.2 (2021-11-01)
os macOS Catalina 10.15.7
system x86_64, darwin17.0
ui X11
language (EN)
collate C
ctype UTF-8
tz Europe/Madrid
date 2024-02-07
pandoc 2.10.1 @ /usr/local/bin/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-5 2016-07-21 [1] CRAN (R 4.1.0)
AnnotationDbi 1.56.2 2021-11-09 [1] Bioconductor
backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.0)
beeswarm 0.4.0 2021-06-01 [1] CRAN (R 4.1.0)
Biobase 2.54.0 2021-10-26 [1] Bioconductor
BiocFileCache 2.2.1 2022-01-23 [1] Bioconductor
BiocGenerics 0.40.0 2021-10-26 [1] Bioconductor
biomaRt 2.50.3 2022-02-06 [1] Bioconductor
Biostrings 2.62.0 2021-10-26 [1] Bioconductor
BioVenn * 1.1.3 2021-06-19 [1] CRAN (R 4.1.0)
bit 4.0.5 2022-11-15 [1] CRAN (R 4.1.2)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.1.0)
bitops 1.0-7 2021-04-24 [1] CRAN (R 4.1.0)
blob 1.2.4 2023-03-17 [1] CRAN (R 4.1.2)
broom 1.0.5 2023-06-09 [1] CRAN (R 4.1.2)
bslib 0.5.1 2023-08-11 [1] CRAN (R 4.1.2)
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.1.2)
Cairo 1.6-1 2023-08-18 [1] CRAN (R 4.1.2)
car 3.1-2 2023-03-30 [1] CRAN (R 4.1.2)
carData 3.0-5 2022-01-06 [1] CRAN (R 4.1.2)
caroline * 0.9.0 2022-11-12 [1] CRAN (R 4.1.2)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.1.0)
cli 3.6.1 2023-03-23 [1] CRAN (R 4.1.2)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.1.2)
crayon 1.5.2 2022-09-29 [1] CRAN (R 4.1.2)
crosstalk 1.2.0 2021-11-04 [1] CRAN (R 4.1.0)
curl 5.2.0 2023-12-08 [1] CRAN (R 4.1.2)
DBI 1.1.3 2022-06-18 [1] CRAN (R 4.1.2)
dbplyr 2.3.3 2023-07-07 [1] CRAN (R 4.1.2)
digest 0.6.33 2023-07-07 [1] CRAN (R 4.1.2)
dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.1.2)
DT * 0.29 2023-08-29 [1] CRAN (R 4.1.2)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
evaluate 0.21 2023-05-05 [1] CRAN (R 4.1.2)
fansi 1.0.4 2023-01-22 [1] CRAN (R 4.1.2)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.1.2)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.1.2)
filelock 1.0.2 2018-10-05 [1] CRAN (R 4.1.0)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.1.2)
GenomeInfoDb 1.30.1 2022-01-30 [1] Bioconductor
GenomeInfoDbData 1.2.7 2023-08-29 [1] Bioconductor
ggbeeswarm 0.7.2 2023-04-29 [1] CRAN (R 4.1.2)
ggforce * 0.4.1 2022-10-04 [1] CRAN (R 4.1.2)
ggh4x * 0.2.5 2023-07-15 [1] CRAN (R 4.1.2)
ggplot2 * 3.4.3 2023-08-14 [1] CRAN (R 4.1.2)
ggrastr * 1.0.2 2023-06-01 [1] CRAN (R 4.1.2)
ggsignif * 0.6.4 2022-10-13 [1] CRAN (R 4.1.2)
glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.2)
gtable 0.3.4 2023-08-21 [1] CRAN (R 4.1.2)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.1.2)
htmltools 0.5.6 2023-08-10 [1] CRAN (R 4.1.2)
htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.1.2)
httr 1.4.7 2023-08-15 [1] CRAN (R 4.1.2)
IRanges 2.28.0 2021-10-26 [1] Bioconductor
janitor * 2.2.0 2023-02-02 [1] CRAN (R 4.1.2)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.1.2)
KEGGREST 1.34.0 2021-10-26 [1] Bioconductor
knitr 1.43 2023-05-25 [1] CRAN (R 4.1.2)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.1.0)
lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.1.2)
lubridate 1.9.2 2023-02-10 [1] CRAN (R 4.1.2)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.1.2)
MASS 7.3-60 2023-05-04 [1] CRAN (R 4.1.2)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.1.0)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.0)
patchwork * 1.1.3 2023-08-14 [1] CRAN (R 4.1.2)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.1.2)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
plotrix 3.8-2 2021-09-08 [1] CRAN (R 4.1.0)
png 0.1-8 2022-11-29 [1] CRAN (R 4.1.2)
polyclip 1.10-4 2022-10-20 [1] CRAN (R 4.1.2)
prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.1.0)
progress 1.2.2 2019-05-16 [1] CRAN (R 4.1.0)
purrr 1.0.2 2023-08-10 [1] CRAN (R 4.1.2)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.0)
rappdirs 0.3.3 2021-01-31 [1] CRAN (R 4.1.0)
Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.1.2)
RCurl 1.98-1.12 2023-03-27 [1] CRAN (R 4.1.2)
readr * 2.1.4 2023-02-10 [1] CRAN (R 4.1.2)
readxl * 1.4.3 2023-07-06 [1] CRAN (R 4.1.2)
rlang 1.1.1 2023-04-28 [1] CRAN (R 4.1.2)
rmarkdown 2.24 2023-08-14 [1] CRAN (R 4.1.2)
RSQLite 2.3.1 2023-04-03 [1] CRAN (R 4.1.2)
rstatix * 0.7.2 2023-02-01 [1] CRAN (R 4.1.2)
rstudioapi 0.15.0 2023-07-07 [1] CRAN (R 4.1.2)
S4Vectors 0.32.4 2022-03-29 [1] Bioconductor
sass 0.4.7 2023-07-15 [1] CRAN (R 4.1.2)
scales 1.2.1 2022-08-20 [1] CRAN (R 4.1.2)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.0)
snakecase 0.11.1 2023-08-27 [1] CRAN (R 4.1.2)
stringi 1.7.12 2023-01-11 [1] CRAN (R 4.1.2)
stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.1.2)
svglite 2.1.1 2023-01-10 [1] CRAN (R 4.1.2)
systemfonts 1.0.4 2022-02-11 [1] CRAN (R 4.1.2)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.1.2)
tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.1.2)
tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.1.2)
timechange 0.2.0 2023-01-11 [1] CRAN (R 4.1.2)
tweenr 2.0.2 2022-09-06 [1] CRAN (R 4.1.2)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.1.2)
utf8 1.2.3 2023-01-31 [1] CRAN (R 4.1.2)
vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.1.2)
vipor 0.4.5 2017-03-22 [1] CRAN (R 4.1.0)
vroom 1.6.3 2023-04-28 [1] CRAN (R 4.1.2)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.2)
xfun 0.40 2023-08-09 [1] CRAN (R 4.1.2)
XML 3.99-0.14 2023-03-19 [1] CRAN (R 4.1.2)
xml2 1.3.5 2023-07-06 [1] CRAN (R 4.1.2)
XVector 0.34.0 2021-10-26 [1] Bioconductor
yaml 2.3.7 2023-01-23 [1] CRAN (R 4.1.2)
zlibbioc 1.40.0 2021-10-26 [1] Bioconductor
[1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
──────────────────────────────────────────────────────────────────────────────